- This Cookie Usage Policy explains how Decoding-Us.com (the "Website") uses cookies.
-
-
- Coverage statistics are calculated from sequencing reads aligned to the
- chm13v2.0/hs1 reference genome
- using BWA-MEM .
- A site is considered "callable" when it has at least four reads with high mapping confidence.
-
-
-Decoding Us will be a next-generation platform for citizen science focused on empowering individuals to
- contribute to genealogical and population studies using Whole Genome Sequencing (WGS) data and compatible
- technologies. Recognizing the limitations of centralized data silos and the growing importance of user data
- sovereignty, Decoding Us will be architected as a federated system, placing control and ownership firmly in
- the hands of the participants. This approach will foster greater trust, encourage broader participation,
- and unlock new possibilities for collaborative discovery in understanding our shared human history.
-
- This Privacy Statement explains how Decoding-Us.com (the "Website") handles the limited data processed during your interaction with our application, including the use of data from a federated platform.
-
-
- The Decoding Us Reputation System is designed to foster a high-quality, trustworthy community for genetic genealogy and research.
- It rewards positive contributions and protects the community from spam and abuse.
-
-
-
- Every user starts with a neutral reputation score. As you contribute to the platform, your score increases.
- Conversely, actions that harm the community may lower your score.
- Higher scores unlock advanced features, ensuring that powerful tools are used by trusted members.
-
-
-
- @if(build.nonEmpty) {
- No genome regions found for @build.get.
- } else {
- No genome regions found.
- }
-
-} else {
- No haplogroups found matching your criteria.
-} else {
-
- No matching variants found that aren't already associated with @haplogroupName.
-
- } else {
-
- Showing first 10 of @variants.size results. Refine your search for more specific results.
-
- }
- }
-} else {
- No variants associated with this haplogroup.
-} else {
- @defining(s"variants-$haplogroupId") { containerId =>
-
- @if(query.exists(_.trim.nonEmpty)) {
- No variants found matching "@query.get".
- } else {
- Enter a search term to find variants.
- }
-
-} else {
-
- @searchQuery match {
- case Some(q) => {
- @messages("publication.list.notFound", q)
- }
- case None => {
- @messages("publication.list.empty")
- }
- }
-
-} else {
-
- @for(details <- paginatedPublications.items) {
- @publicationDetails(details)
-
- }
-
-
-
-
-
-
-
@messagesProvider("support.myMessages.heading")
-
- @messagesProvider("support.newMessage")
-
-
-
- @views.html.fragments.flashMessages(request.flash)
-
- @if(messages.isEmpty) {
-
-
- @messagesProvider("support.myMessages.empty")
-
- } else {
-
- @for(msg <- messages) {
-
-
-
@msg.subject
- @msg.createdAt.toLocalDate
-
-
@msg.message
-
-
-
- @messagesProvider(s"support.status.${msg.status.value}")
-
-
-
-
- }
-
- }
-
-
-
-}
-
-@statusBadgeClass(status: MessageStatus) = @{
- status match {
- case MessageStatus.New => "bg-primary"
- case MessageStatus.Read => "bg-info"
- case MessageStatus.Replied => "bg-success"
- case MessageStatus.Closed => "bg-secondary"
- }
-}
diff --git a/app/views/terms.scala.html b/app/views/terms.scala.html
deleted file mode 100644
index 0b261cab..00000000
--- a/app/views/terms.scala.html
+++ /dev/null
@@ -1,15 +0,0 @@
-@import org.webjars.play.WebJarsUtil
-@()(implicit webJarsUtil: WebJarsUtil, messages: Messages, request: RequestHeader)
-
-@main(messages("nav.terms")) {
-
- @messages("legal.terms.title")
- @{
- messages.lang.code match {
- case "es" => views.html.content.en.termsText()
- case "fr" => views.html.content.en.termsText()
- case _ => views.html.content.en.termsText()
- }
- }
-
-}
diff --git a/app/views/user/profile.scala.html b/app/views/user/profile.scala.html
deleted file mode 100644
index c3c2571e..00000000
--- a/app/views/user/profile.scala.html
+++ /dev/null
@@ -1,54 +0,0 @@
-@import org.webjars.play.WebJarsUtil
-@import models.domain.user.User
-@(profileForm: Form[controllers.ProfileFormData], user: User)(implicit request: Request[AnyContent], messages: Messages, webJarsUtil: WebJarsUtil)
-
-@main(messages("profile.title")) {
-
-
-
-
-
-
- @views.html.fragments.flashMessages(request.flash)
-
-
-
@messages("profile.accountDetails")
-
- @messages("profile.handle")
- @@@user.handle.getOrElse("-")
-
- @messages("profile.did")
- @user.did
-
-
-
-
-
-
@messages("profile.editProfile")
- @helper.form(action = routes.ProfileController.update) {
- @helper.CSRF.formField
-
-
-
@messages("profile.displayName")
-
-
@messages("profile.displayNameHelp")
- @profileForm("displayName").errors.map { error =>
-
@messages(error.message)
- }
-
-
-
- }
-
-
-
-
-
-}
diff --git a/app/views/variants/browser.scala.html b/app/views/variants/browser.scala.html
deleted file mode 100644
index b0dd9d45..00000000
--- a/app/views/variants/browser.scala.html
+++ /dev/null
@@ -1,68 +0,0 @@
-@import org.webjars.play.WebJarsUtil
-@(query: Option[String], page: Int, pageSize: Int)(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil)
-
-@main(messages("variants.browser.title")) {
-
-
-
-
-
-
@messages("variants.browser.heading")
-
@messages("variants.browser.description")
-
-
-
-
-
-
-
-
-
-
- @views.html.fragments.searchInput(
- id = "variant-search",
- name = "query",
- value = query,
- placeholder = messages("variants.browser.searchPlaceholder"),
- hxGet = controllers.routes.VariantBrowserController.listFragment(None, 1, pageSize),
- hxTarget = "#variants-table"
- )
- @messages("variants.browser.searchHelp")
-
-
-
-
-
-
- Loading...
-
-
Loading variants...
-
-
-
-
-
-
-
-
-
-
-
-
@messages("variants.browser.selectVariant")
-
-
-
-
-
-
-}
diff --git a/app/views/variants/detailPanel.scala.html b/app/views/variants/detailPanel.scala.html
deleted file mode 100644
index 915847ba..00000000
--- a/app/views/variants/detailPanel.scala.html
+++ /dev/null
@@ -1,98 +0,0 @@
-@import models.domain.genomics.{NamingStatus, VariantV2}
-@import models.domain.haplogroups.Haplogroup
-@import play.api.libs.json.JsObject
-@import utils.VariantViewUtils
-@(variant: VariantV2, haplogroups: Seq[Haplogroup])(implicit request: RequestHeader, messages: Messages)
-
-
-
-
-
- @messages("variants.detail.rsId")
-
- @variant.rsIds.headOption.map { rs =>
- @rs
- }.getOrElse("-")
-
-
- @messages("variants.detail.commonName")
- @variant.canonicalName.getOrElse("-")
-
- @defining(VariantViewUtils.primaryAlleles(variant)) { alleles =>
- @messages("variants.detail.ancestral")
- @alleles._1
-
- @messages("variants.detail.derived")
- @alleles._2
- }
-
- @messages("variants.detail.type")
- @variant.mutationType.displayName
-
- @messages("variants.detail.status")
-
-
- @variant.namingStatus.displayName
-
-
-
-
- @if(variant.commonNames.nonEmpty || variant.rsIds.size > 1) {
-
-
@messages("variants.detail.altNames")
-
- @if(variant.commonNames.nonEmpty) {
-
- @messages("variants.detail.aliasType.snpNames"):
- @for(name <- variant.commonNames) {
- @name
- }
-
- }
- @if(variant.rsIds.nonEmpty) {
-
-
@messages("variants.detail.aliasType.dbsnp"):
- @for(rsId <- variant.rsIds) {
-
@rsId
- }
-
- }
-
- }
-
-
-
-
@messages("variants.detail.refBuilds")
- @views.html.fragments.variant.referenceBuilds(variant)
-
-
-
-
@messages("variants.detail.usedBy")
- @if(haplogroups.isEmpty) {
-
@messages("variants.detail.noHaplogroups")
- } else {
-
- @for(hg <- haplogroups.take(10)) {
- @defining(if(hg.haplogroupType.toString == "Y") controllers.routes.TreeController.ytree(Some(hg.name)).url else controllers.routes.TreeController.mtree(Some(hg.name)).url) { treeUrl =>
-
- @hg.name
-
- }
- }
- @if(haplogroups.size > 10) {
-
+@(haplogroups.size - 10) @messages("variants.detail.more")
- }
-
- }
-
- @variant.notes.map { notes =>
-
-
@messages("variants.detail.notes")
-
@notes
- }
-
-
\ No newline at end of file
diff --git a/app/views/variants/listFragment.scala.html b/app/views/variants/listFragment.scala.html
deleted file mode 100644
index ae8ff090..00000000
--- a/app/views/variants/listFragment.scala.html
+++ /dev/null
@@ -1,85 +0,0 @@
-@import models.domain.genomics.VariantV2
-@import utils.VariantViewUtils
-@(variants: Seq[VariantV2], query: Option[String], currentPage: Int, totalPages: Int, pageSize: Int, totalCount: Int)(implicit request: RequestHeader, messages: Messages)
-
-@defining(java.text.NumberFormat.getIntegerInstance()) { nf =>
-
-
- @if(query.exists(_.trim.nonEmpty)) {
- @messages("variants.browser.foundMatching", nf.format(totalCount), query.get)
- } else {
- @messages("variants.browser.showingTotal", nf.format(totalCount))
- }
-
- @if(totalPages > 1) {
- @messages("variants.browser.pageOf", currentPage, nf.format(totalPages))
- }
-
-}
-
-@if(variants.isEmpty) {
-
- @if(query.exists(_.trim.nonEmpty)) {
- @messages("variants.browser.noResults", query.get)
- } else {
- @messages("variants.browser.enterSearch")
- }
-
-} else {
-
-
-
-
- @messages("variants.browser.col.name")
- @messages("variants.browser.col.alleles")
- @messages("variants.browser.col.type")
- @messages("variants.browser.col.builds")
-
-
-
- @for(variant <- variants) {
- @defining(VariantViewUtils.primaryAlleles(variant)) { alleles =>
- @defining(VariantViewUtils.refGenomes(variant)) { refs =>
-
-
- @variant.displayName
- @variant.rsIds.headOption.filter(_ != variant.canonicalName.getOrElse("")).map { rsId =>
- @rsId
- }
-
-
- @alleles._1
- @if(alleles._2 != "?") {
- @alleles._2
- }
-
-
- @variant.mutationType.displayName
-
-
- @for(refGenome <- refs) {
-
- @VariantViewUtils.shortRefGenome(refGenome)
-
- }
-
-
- }
- }
- }
-
-
-
-
- @views.html.fragments.pagination(
- currentPage,
- totalPages,
- p => controllers.routes.VariantBrowserController.listFragment(query, p, pageSize),
- "#variants-table",
- "#variant-search"
- )
-}
\ No newline at end of file
diff --git a/app/views/ytree.scala.html b/app/views/ytree.scala.html
deleted file mode 100644
index 4dde8472..00000000
--- a/app/views/ytree.scala.html
+++ /dev/null
@@ -1,13 +0,0 @@
-@(rootHaplogroup: Option[String], showVerticalTree: Boolean = false)(implicit webJarsUtil: org.webjars.play.WebJarsUtil, messages: Messages, request: RequestHeader)
-
-@import controllers.routes.TreeController
-
-@fragments.treeView(
- titleKey = "nav.ytree",
- fragmentCall = TreeController.yTreeFragment,
- fullPageCall = TreeController.ytree,
- rootHaplogroup = rootHaplogroup,
- searchPlaceholder = "e.g. R-M269, I-M253...",
- searchInputTitle = "Enter a haplogroup name (e.g. R-M269)",
- showVerticalTree = showVerticalTree
-)
\ No newline at end of file
diff --git a/build.sbt b/build.sbt
deleted file mode 100644
index 649eb376..00000000
--- a/build.sbt
+++ /dev/null
@@ -1,67 +0,0 @@
-name := """decodingus"""
-
-version := "1.0-SNAPSHOT"
-
-lazy val root = (project in file(".")).enablePlugins(PlayScala)
-
-scalaVersion := "3.3.6"
-
-val SLICK_VERSION = "6.2.0"
-val SLICK_PG_VERSION = "0.23.1"
-val TAPIR_VERSION = "1.11.50"
-val AWS_VERSION = "2.40.3"
-
-// WARNING: Updating beyond 1.1.2 will result in startup errors, since quartz schedular needs this version
-val APACHE_PEKKO_VERSION = "1.1.5"
-
-scalacOptions ++= Seq("-Xmax-inlines", "128")
-
-libraryDependencies ++= Seq(
- guice,
- caffeine,
- "org.scala-lang.modules" %% "scala-xml" % "2.4.0",
- "org.playframework" %% "play-slick" % SLICK_VERSION,
- "org.playframework" %% "play-slick-evolutions" % SLICK_VERSION,
- "org.postgresql" % "postgresql" % "42.7.8",
- "com.github.tminglei" %% "slick-pg" % SLICK_PG_VERSION,
- "com.github.tminglei" %% "slick-pg_jts" % SLICK_PG_VERSION,
- "com.github.tminglei" %% "slick-pg_play-json" % SLICK_PG_VERSION,
- "org.webjars" %% "webjars-play" % "3.0.9",
- "org.webjars" % "bootstrap" % "5.3.8",
- "org.webjars" % "popper.js" % "2.11.7",
- "org.webjars.npm" % "htmx.org" % "2.0.8",
- "org.scalatestplus.play" %% "scalatestplus-play" % "7.0.2" % Test,
- "com.h2database" % "h2" % "2.4.240" % Test,
- "org.codehaus.janino" % "janino" % "3.1.12",
- "com.nappin" %% "play-recaptcha" % "3.0",
-
- // Core Tapir libraries
- "com.softwaremill.sttp.tapir" %% "tapir-core" % TAPIR_VERSION,
- "com.softwaremill.sttp.tapir" %% "tapir-json-play" % TAPIR_VERSION,
-
- // Play server interpreter
- "com.softwaremill.sttp.tapir" %% "tapir-play-server" % TAPIR_VERSION,
-
- // OpenAPI / Swagger UI generation
- "com.softwaremill.sttp.tapir" %% "tapir-swagger-ui-bundle" % TAPIR_VERSION,
-
- "io.github.samueleresca" %% "pekko-quartz-scheduler" % "1.3.0-pekko-1.1.x",
-
- "org.apache.pekko" %% "pekko-protobuf-v3" % APACHE_PEKKO_VERSION,
- "org.apache.pekko" %% "pekko-serialization-jackson" % APACHE_PEKKO_VERSION,
- "org.apache.pekko" %% "pekko-stream" % APACHE_PEKKO_VERSION,
- "org.apache.pekko" %% "pekko-actor-typed" % APACHE_PEKKO_VERSION,
- "org.apache.pekko" %% "pekko-slf4j" % APACHE_PEKKO_VERSION,
-
- "software.amazon.awssdk" % "secretsmanager" % AWS_VERSION,
- "software.amazon.awssdk" % "ses" % AWS_VERSION,
- "org.hashids" % "hashids" % "1.0.3",
- "org.mindrot" % "jbcrypt" % "0.4", // BCrypt for password hashing
- "com.github.samtools" % "htsjdk" % "4.3.0",
- "org.scalatestplus" %% "mockito-5-10" % "3.2.18.0" % Test
-)
-
-// Code Coverage Configuration
-coverageMinimumStmtTotal := 5
-coverageFailOnMinimum := true
-coverageHighlighting := true
diff --git a/conf/application.conf b/conf/application.conf
deleted file mode 100644
index 16f05ea6..00000000
--- a/conf/application.conf
+++ /dev/null
@@ -1,261 +0,0 @@
-# https://www.playframework.com/documentation/latest/Configuration
-
-play.http.secret.key="changeme"
-play.http.secret.key=${?APPLICATION_SECRET}
-
-play.i18n {
- langs = [ "en", "fr", "es" ]
-}
-
-# Session cookie security
-play.http.session.httpOnly = true
-play.http.session.secure = true
-play.http.session.secure = ${?SESSION_SECURE}
-play.http.session.sameSite = "lax"
-
-# Increase max request body size for tree merge API (default is 100KB)
-play.http.parser.maxMemoryBuffer = 10MB
-
-# Disable the startup banner
-play.application.showBanner=false
-
-play.modules.enabled += "modules.BaseModule"
-play.modules.enabled += "modules.ServicesModule"
-
-play.modules.enabled += "modules.RecaptchaModule"
-play.modules.enabled += "modules.StartupModule"
-play.modules.enabled += "modules.ApplicationModule"
-play.modules.enabled += "modules.ApiSecurityModule"
-play.modules.enabled += "modules.PDSRegistrationModule"
-
-play.cache.caffeine {
- # Default cache configuration
- default = {
- initial-capacity = 100
- maximum-size = 10000
- expire-after-write = 24h
- }
- # Specific cache configurations if needed
- sitemap = ${play.cache.caffeine.default}
-}
-
-# Tell Play to use Caffeine instead of the default EhCache
-play.cache.createBoundCaches = false
-play.modules.enabled += "play.api.cache.caffeine.CaffeineCacheModule"
-
-recaptcha {
- enable = false
- enable = ${?ENABLE_RECAPTCHA}
-
- # Your existing recaptcha configuration here
- privateKey = "your-secret-key"
- privateKey = ${?RECAPTCHA_SECRET_KEY}
- publicKey = "your-site-key"
- publicKey = ${?RECAPTCHA_SITE_KEY}
-}
-
-# Apply evolutions automatically, disable in the production version
-play.evolutions.autocommit=true
-
-slick.dbs.default {
- profile="slick.jdbc.PostgresProfile$"
- db {
- numThreads = 32
- queueSize = 5000
-
- driver="org.postgresql.Driver"
- url="jdbc:postgresql://localhost:5432/decodingus_db"
- # Local Development Credentials
- username="decoding_us_user"
- # Overridden in production on startup
- password="user_password"
- }
-}
-
-slick.dbs.metadata {
- profile="slick.jdbc.PostgresProfile$"
- db {
- numThreads = 32
- queueSize = 5000
-
- driver="org.postgresql.Driver"
- url="jdbc:postgresql://localhost:5432/decodingus_metadata" # Temporarily pointing to the same DB
- username="decodingus_user"
- password="decodingus_password"
- }
-}
-
-contact {
- recipient.email = "your-email@domain.com"
- recipient.email = ${?CONTACT_RECIPIENT_EMAIL} # Can be overridden by environment variable
-}
-
-# AT Protocol / PDS configuration
-atproto {
- client.timeout = 5000
- client.timeout = ${?ATPROTO_CLIENT_TIMEOUT}
- plc.directory = "https://plc.directory"
- plc.directory = ${?ATPROTO_PLC_DIRECTORY}
-}
-
-# PDS Edge node authentication
-pds.auth {
- timestamp.window.seconds = 300
- timestamp.window.seconds = ${?PDS_AUTH_TIMESTAMP_WINDOW}
-}
-
-pekko {
- loglevel = "DEBUG"
- stdout-loglevel = "DEBUG"
- jvm-exit-on-fatal-error = off
-
- quartz.schedules {
- # Publication Updater: Run every two weeks
- PublicationUpdater {
- # CRON expression for every two weeks at midnight UTC
- # This will trigger at 00:00:00 UTC on the 1st and 15th of every month.
- expression = "0 0 0 1,15 * ?"
- timezone = "UTC"
- description = "Refresh publication data from OpenAlex every two weeks"
- }
-
- PublicationDiscovery {
- # Run weekly on Sunday at 2 AM UTC
- expression = "0 0 2 ? * SUN"
- timezone = "UTC"
- description = "Discover new publications via OpenAlex"
- }
-
- YBrowseVariantUpdate {
- # Run weekly on Monday at 3 AM UTC
- expression = "0 0 3 ? * MON"
- timezone = "UTC"
- description = "Download and ingest Y-DNA SNP data from YBrowse"
- }
-
- VariantExport {
- # Run daily at 4 AM UTC (after YBrowse update on Mondays)
- expression = "0 0 4 * * ?"
- timezone = "UTC"
- description = "Generate full variant export file for Edge App"
- }
-
- MatchDiscovery {
- # Run daily at 3 AM UTC
- expression = "0 0 3 * * ?"
- timezone = "UTC"
- description = "Compute population overlap scores and generate match suggestions"
- }
- }
-}
-
-decodingus.matching {
- discovery {
- shared-match-threshold = 2
- population-overlap-threshold = 0.6
- suggestion-expiry-days = 90
- max-suggestions-per-user = 100
- }
-
- requests {
- default-expiry-days = 30
- max-pending-requests = 50
- consent-expiry-days = 365
- }
-
- relay {
- session-timeout-minutes = 10
- max-concurrent-sessions = 100
- stale-cleanup-interval-seconds = 60
- }
-}
-
-openalex.mailToEmail="jkane@tds.net"
-
-aws {
- region = "us-east-1"
- secrets {
- apiKey {
- name = "your-secret-name"
- }
- userEncryptionKey {
- name = "your-user-encryption-key-secret-name"
- }
- }
-}
-
-biosample.hash.salt = "your-biosample-salt"
-biosample.hash.salt = ${?BIOSAMPLE_HASH_SALT}
-
-# Feature flags - enable/disable features in development
-features {
- tree {
- # Show branch age estimates (Formed/TMRCA) on tree nodes
- # Disabled until age data is populated
- showBranchAgeEstimates = false
- showBranchAgeEstimates = ${?FEATURE_SHOW_BRANCH_AGE_ESTIMATES}
- }
-}
-
-genomics {
- references {
- # Canonical names for supported linear reference builds (must match database reference_genome values)
- # Using short names without patch versions: GRCh37, GRCh38, hs1
- # hs1 is UCSC's naming convention for T2T-CHM13v2.0
- supported = ["GRCh37", "GRCh38", "hs1"]
-
- # Aliases to map common/legacy names to canonical names
- aliases {
- "hg19" = "GRCh37"
- "hg38" = "GRCh38"
- # Legacy patch versions (for backwards compatibility with old data)
- "GRCh37.p13" = "GRCh37"
- "GRCh38.p14" = "GRCh38"
- # T2T-CHM13 aliases (UCSC uses hs1)
- "chm13" = "hs1"
- "chm13v2.0" = "hs1"
- "T2T-CHM13" = "hs1"
- "T2T-CHM13v2.0" = "hs1"
- }
-
- # Paths to reference genome FASTA files
- fasta_paths {
- "GRCh37" = "/home/jkane/Genomics/Reference/b37/human_g1k_v37.fasta.gz"
- "GRCh38" = "/home/jkane/Genomics/Reference/b38/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna"
- "hs1" = "/home/jkane/Genomics/Reference/chm13v2.0/chm13v2.0.fa.gz"
- }
- }
-
- liftover {
- # Chain files for coordinate conversion
- # Format: "Source->Target" = "path/to/chain/file"
- chains {
- "GRCh38->GRCh37" = "/mnt/md0/genomics/chains/hg38ToHg19.over.chain.gz"
- "GRCh38->hs1" = "/mnt/md0/genomics/chains/hg38ToHs1.over.chain.gz"
- "hs1->GRCh38" = "/mnt/md0/genomics/chains/hs1ToHg38.over.chain.gz"
- "hs1->GRCh37" = "/mnt/md0/genomics/chains/hs1ToHg19.over.chain.gz"
- }
- }
-
- ybrowse {
- # URL to download the YBrowse Y-DNA SNP GFF3 file
- gff_url = "https://ybrowse.org/gbrowse2/gff/snps_hg38.gff3"
- gff_url = ${?YBROWSE_GFF_URL}
-
- # Local storage path for downloaded GFF file
- gff_storage_path = "/mnt/md0/decodingus/ybrowse/snps_hg38.gff3"
- gff_storage_path = ${?YBROWSE_GFF_STORAGE_PATH}
- }
-
- hipstr {
- # URL to download the HipSTR STR reference BED file (GRCh38)
- url = "https://github.com/HipSTR-Tool/HipSTR-references/raw/master/human/GRCh38.hipstr_reference.bed.gz"
- url = ${?HIPSTR_URL}
-
- # Local storage path for downloaded BED file
- storage_path = "/mnt/md0/decodingus/hipstr/GRCh38.hipstr_reference.bed.gz"
- storage_path = ${?HIPSTR_STORAGE_PATH}
- }
-}
-
-
diff --git a/conf/application.test.conf b/conf/application.test.conf
deleted file mode 100644
index 792ab6f5..00000000
--- a/conf/application.test.conf
+++ /dev/null
@@ -1,35 +0,0 @@
-include "application.conf"
-
-# Override database to use H2 in-memory for tests
-slick.dbs.default {
- profile = "slick.jdbc.H2Profile$"
- db {
- driver = "org.h2.Driver"
- url = "jdbc:h2:mem:test;MODE=PostgreSQL;DATABASE_TO_UPPER=FALSE;DB_CLOSE_DELAY=-1"
- username = "sa"
- password = ""
- numThreads = 2
- queueSize = 100
- }
-}
-
-slick.dbs.metadata {
- profile = "slick.jdbc.H2Profile$"
- db {
- driver = "org.h2.Driver"
- url = "jdbc:h2:mem:test_metadata;MODE=PostgreSQL;DATABASE_TO_UPPER=FALSE;DB_CLOSE_DELAY=-1"
- username = "sa"
- password = ""
- numThreads = 2
- queueSize = 100
- }
-}
-
-# Disable evolutions for tests
-play.evolutions.enabled = false
-
-# Disable startup services that hit the database
-play.modules.disabled += "modules.StartupModule"
-
-# Disable scheduled jobs
-pekko.quartz.schedules = {}
diff --git a/conf/evolutions/default/1.sql b/conf/evolutions/default/1.sql
deleted file mode 100644
index af893fbf..00000000
--- a/conf/evolutions/default/1.sql
+++ /dev/null
@@ -1,305 +0,0 @@
-# --- !Ups
-CREATE EXTENSION IF NOT EXISTS postgis;
-
-CREATE TABLE specimen_donor
-(
- id SERIAL PRIMARY KEY,
- donor_identifier VARCHAR(255) NOT NULL,
- origin_biobank VARCHAR(255) NOT NULL
-);
-
-CREATE TABLE biosample
-(
- id SERIAL PRIMARY KEY,
- sample_accession VARCHAR(255) UNIQUE NOT NULL,
- description TEXT NOT NULL,
- alias VARCHAR(255),
- center_name VARCHAR(255) NOT NULL,
- sex VARCHAR(15) CHECK (sex IN ('male', 'female', 'intersex')),
- geocoord GEOMETRY(Point, 4326),
- specimen_donor_id INT REFERENCES specimen_donor (id) ON DELETE CASCADE,
- sample_guid UUID NOT NULL
-);
-
-CREATE TABLE citizen_biosample
-(
- id SERIAL PRIMARY KEY,
- citizen_biosample_did VARCHAR(255) UNIQUE,
- source_platform VARCHAR(255),
- collection_date DATE,
- sex VARCHAR(15) CHECK (sex IN ('male', 'female', 'intersex')),
- geocoord GEOMETRY(Point, 4326),
- description TEXT,
- sample_guid UUID NOT NULL
-);
-
-CREATE TABLE pgp_biosample
-(
- pgp_biosample_id SERIAL PRIMARY KEY,
- pgp_participant_id VARCHAR(255) NOT NULL,
- ena_biosample_accession VARCHAR(255) UNIQUE,
- sex VARCHAR(15) CHECK (sex IN ('male', 'female', 'intersex')),
- sample_guid UUID NOT NULL
-);
-
-CREATE TABLE haplogroup
-(
- haplogroup_id SERIAL PRIMARY KEY,
- name VARCHAR(255) NOT NULL,
- lineage VARCHAR(255),
- description TEXT,
- haplogroup_type VARCHAR(10) NOT NULL CHECK (haplogroup_type IN ('Y', 'MT')),
- revision_id INTEGER NOT NULL,
- source VARCHAR(255) NOT NULL,
- confidence_level VARCHAR(255) NOT NULL,
- valid_from TIMESTAMP NOT NULL,
- valid_until TIMESTAMP,
- unique (name)
-);
-
-CREATE TABLE haplogroup_relationship
-(
- haplogroup_relationship_id SERIAL PRIMARY KEY,
- child_haplogroup_id INTEGER NOT NULL,
- parent_haplogroup_id INTEGER NOT NULL,
- revision_id INTEGER NOT NULL,
- valid_from TIMESTAMP NOT NULL,
- valid_until TIMESTAMP,
- source VARCHAR(255) NOT NULL,
- FOREIGN KEY (child_haplogroup_id) REFERENCES haplogroup (haplogroup_id) ON DELETE CASCADE,
- FOREIGN KEY (parent_haplogroup_id) REFERENCES haplogroup (haplogroup_id) ON DELETE CASCADE,
- UNIQUE (child_haplogroup_id, revision_id)
-);
-
-CREATE TABLE genbank_contig
-(
- genbank_contig_id SERIAL PRIMARY KEY,
- accession VARCHAR(255) NOT NULL,
- common_name VARCHAR(255),
- reference_genome VARCHAR(255),
- seq_length INT NOT NULL,
- UNIQUE (accession)
-);
-
-CREATE TABLE variant
-(
- variant_id SERIAL PRIMARY KEY,
- genbank_contig_id INT NOT NULL,
- position INTEGER NOT NULL,
- reference_allele VARCHAR(255) NOT NULL,
- alternate_allele VARCHAR(255) NOT NULL,
- variant_type VARCHAR(5) NOT NULL CHECK (variant_type IN ('SNP', 'INDEL')),
- rs_id VARCHAR(255),
- common_name VARCHAR(255),
- FOREIGN KEY (genbank_contig_id) REFERENCES genbank_contig (genbank_contig_id) ON DELETE CASCADE,
- UNIQUE (genbank_contig_id, position, reference_allele, alternate_allele)
-);
-
-CREATE TABLE haplogroup_variant
-(
- haplogroup_variant_id SERIAL PRIMARY KEY,
- haplogroup_id INT NOT NULL,
- variant_id INT NOT NULL,
- FOREIGN KEY (haplogroup_id) REFERENCES haplogroup (haplogroup_id) ON DELETE CASCADE,
- FOREIGN KEY (variant_id) REFERENCES variant (variant_id) ON DELETE CASCADE,
- UNIQUE (haplogroup_id, variant_id)
-);
-
-CREATE TABLE biosample_haplogroup
-(
- sample_guid UUID NOT NULL,
- y_haplogroup_id INT,
- mt_haplogroup_id INT,
- FOREIGN KEY (y_haplogroup_id) REFERENCES haplogroup (haplogroup_id) ON DELETE CASCADE,
- FOREIGN KEY (mt_haplogroup_id) REFERENCES haplogroup (haplogroup_id) ON DELETE CASCADE,
- PRIMARY KEY (sample_guid)
-);
-
-CREATE TABLE analysis_method
-(
- analysis_method_id SERIAL PRIMARY KEY,
- method_name VARCHAR(255) NOT NULL UNIQUE
-);
-
-CREATE TABLE population
-(
- population_id SERIAL PRIMARY KEY,
- population_name VARCHAR(255) NOT NULL UNIQUE
- -- parent_population_id BIGINT REFERENCES population(population_id) -- Uncomment if needed
-);
-
-CREATE TABLE ancestry_analysis
-(
- ancestry_analysis_id SERIAL PRIMARY KEY,
- sample_guid UUID NOT NULL,
- analysis_method_id INT NOT NULL,
- population_id INT NOT NULL,
- probability DECIMAL(5, 4),
- FOREIGN KEY (analysis_method_id) REFERENCES analysis_method (analysis_method_id) ON DELETE CASCADE,
- FOREIGN KEY (population_id) REFERENCES population (population_id) ON DELETE CASCADE,
- UNIQUE (sample_guid, analysis_method_id, population_id)
-);
-
-CREATE TABLE sequence_library
-(
- id SERIAL PRIMARY KEY,
- sample_guid UUID NOT NULL,
- lab VARCHAR(255) NOT NULL,
- test_type VARCHAR(255) NOT NULL,
- run_date TIMESTAMP NOT NULL,
- instrument VARCHAR(255) NOT NULL,
- reads BIGINT NOT NULL,
- read_length INTEGER NOT NULL,
- paired_end BOOLEAN NOT NULL,
- insert_size INTEGER,
- created_at TIMESTAMP NOT NULL,
- updated_at TIMESTAMP
-);
-
-CREATE TABLE sequence_file
-(
- id SERIAL PRIMARY KEY,
- library_id INT NOT NULL,
- file_name VARCHAR(255) NOT NULL,
- file_size_bytes BIGINT NOT NULL,
- file_md5 VARCHAR(255) NOT NULL,
- file_format VARCHAR(255) NOT NULL,
- aligner VARCHAR(255) NOT NULL,
- target_reference VARCHAR(255) NOT NULL,
- created_at TIMESTAMP NOT NULL,
- updated_at TIMESTAMP,
- FOREIGN KEY (library_id) REFERENCES sequence_library (id) ON DELETE CASCADE
-);
-
-CREATE TABLE sequence_http_location
-(
- id SERIAL PRIMARY KEY,
- sequence_file_id INT NOT NULL,
- file_url TEXT NOT NULL,
- file_index_url TEXT,
- FOREIGN KEY (sequence_file_id) REFERENCES sequence_file (id) ON DELETE CASCADE
-);
-
-CREATE TABLE sequence_atp_location
-(
- id SERIAL PRIMARY KEY,
- sequence_file_id INT NOT NULL,
- repo_did VARCHAR(255) NOT NULL,
- record_cid VARCHAR(255) NOT NULL,
- record_path TEXT NOT NULL,
- index_did VARCHAR(255),
- index_cid VARCHAR(255),
- FOREIGN KEY (sequence_file_id) REFERENCES sequence_file (id) ON DELETE CASCADE
-);
-
-CREATE TABLE quality_metrics
-(
- id SERIAL PRIMARY KEY,
- contig_id INT NOT NULL,
- start_pos INT NOT NULL,
- end_pos INT NOT NULL,
- num_reads INT NOT NULL,
- ref_n INT NOT NULL,
- no_cov INT NOT NULL,
- low_cov INT NOT NULL,
- excessive_cov INT NOT NULL,
- poor_mq INT NOT NULL,
- callable INT NOT NULL,
- cov_percent DOUBLE PRECISION NOT NULL,
- mean_depth DOUBLE PRECISION NOT NULL,
- mean_mq DOUBLE PRECISION NOT NULL,
- sequence_file_id BIGINT NOT NULL,
- FOREIGN KEY (sequence_file_id) REFERENCES sequence_file (id) ON DELETE CASCADE,
- FOREIGN KEY (contig_id) REFERENCES genbank_contig (genbank_contig_id) ON DELETE CASCADE
-);
-
-CREATE TABLE reported_variant
-(
- id BIGSERIAL PRIMARY KEY,
- sample_guid UUID NOT NULL,
- contig_id INT NOT NULL,
- position INT NOT NULL,
- reference_allele VARCHAR(255) NOT NULL,
- alternate_allele VARCHAR(255) NOT NULL,
- variant_type VARCHAR(5) NOT NULL CHECK (variant_type IN ('SNP', 'INDEL')),
- reported_date TIMESTAMP NOT NULL,
- provenance VARCHAR(255) NOT NULL,
- confidence_score DOUBLE PRECISION NOT NULL,
- notes TEXT,
- status VARCHAR(255) NOT NULL,
- FOREIGN KEY (contig_id) REFERENCES genbank_contig (genbank_contig_id) ON DELETE CASCADE
-);
-
-CREATE TABLE reported_negative_variant
-(
- id BIGSERIAL PRIMARY KEY,
- sample_guid UUID NOT NULL,
- variant_id INT NOT NULL,
- reported_date TIMESTAMP,
- notes TEXT,
- status VARCHAR(255) NOT NULL,
- FOREIGN KEY (variant_id) REFERENCES variant (variant_id) ON DELETE CASCADE
-);
-
-CREATE TABLE publication
-(
- id SERIAL PRIMARY KEY,
- pubmed_id VARCHAR(20) UNIQUE,
- doi VARCHAR(255) UNIQUE,
- title TEXT NOT NULL,
- journal VARCHAR(255),
- publication_date DATE,
- url VARCHAR(2048)
-);
-
-CREATE TABLE ena_study
-(
- id SERIAL PRIMARY KEY,
- accession VARCHAR(50) UNIQUE NOT NULL,
- title VARCHAR(255) NOT NULL,
- center_name VARCHAR(255) NOT NULL,
- study_name VARCHAR(255) NOT NULL,
- details TEXT
-);
-
-CREATE TABLE publication_ena_study
-(
- publication_id INT,
- ena_study_id INT,
- FOREIGN KEY (publication_id) REFERENCES publication (id),
- FOREIGN KEY (ena_study_id) REFERENCES ena_study (id),
- PRIMARY KEY (publication_id, ena_study_id)
-);
-
-CREATE TABLE publication_biosample
-(
- publication_id INT REFERENCES publication (id) ON DELETE CASCADE,
- biosample_id INT REFERENCES biosample (id) ON DELETE CASCADE,
- PRIMARY KEY (publication_id, biosample_id)
-);
-
-# --- !Downs
-DROP TABLE publication_biosample;
-DROP TABLE publication_ena_study;
-DROP TABLE ena_study;
-DROP TABLE publication;
-DROP TABLE reported_negative_variant;
-DROP TABLE reported_variant;
-DROP TABLE quality_metrics;
-DROP TABLE sequence_atp_location;
-DROP TABLE sequence_http_location;
-DROP TABLE sequence_file;
-DROP TABLE sequence_library;
-DROP TABLE ancestry_analysis;
-DROP TABLE population;
-DROP TABLE analysis_method;
-DROP TABLE biosample_haplogroup;
-DROP TABLE haplogroup_variant;
-DROP TABLE variant;
-DROP TABLE genbank_contig;
-DROP TABLE haplogroup_relationship;
-DROP TABLE haplogroup;
-DROP TABLE pgp_biosample;
-DROP TABLE citizen_biosample;
-DROP TABLE biosample;
-DROP TABLE specimen_donor;
diff --git a/conf/evolutions/default/10.sql b/conf/evolutions/default/10.sql
deleted file mode 100644
index cae6ebef..00000000
--- a/conf/evolutions/default/10.sql
+++ /dev/null
@@ -1,33 +0,0 @@
-# --- !Ups
-
--- Add new columns to the public.publication table from OpenAlex integration
-ALTER TABLE public.publication
- ADD COLUMN open_alex_id VARCHAR(255),
- ADD COLUMN citation_normalized_percentile REAL,
- ADD COLUMN cited_by_count INTEGER,
- ADD COLUMN open_access_status VARCHAR(50),
- ADD COLUMN open_access_url VARCHAR(2048),
- ADD COLUMN primary_topic VARCHAR(255),
- ADD COLUMN publication_type VARCHAR(50),
- ADD COLUMN publisher VARCHAR(255);
-
--- Add unique constraint for open_alex_id if it's guaranteed to be unique
--- ALTER TABLE public.publication ADD CONSTRAINT publication_open_alex_id_uk UNIQUE (open_alex_id);
--- Note: It's often safer to add unique constraints after populating existing NULLs
--- if you have existing rows where open_alex_id would be NULL, as NULL is not unique.
--- If you insert new data, you might add this in a later evolution or use a unique index
--- that allows NULLs (e.g., CREATE UNIQUE INDEX ON table (col) WHERE col IS NOT NULL; in Postgres).
--- For now, if your model allows Option[String], the database column should allow NULL.
-
-# --- !Downs
-
--- Revert changes: Drop the columns added in this evolution
-ALTER TABLE public.publication
- DROP COLUMN publisher,
- DROP COLUMN publication_type,
- DROP COLUMN primary_topic,
- DROP COLUMN open_access_url,
- DROP COLUMN open_access_status,
- DROP COLUMN cited_by_count,
- DROP COLUMN citation_normalized_percentile,
- DROP COLUMN open_alex_id;
\ No newline at end of file
diff --git a/conf/evolutions/default/11.sql b/conf/evolutions/default/11.sql
deleted file mode 100644
index cb904040..00000000
--- a/conf/evolutions/default/11.sql
+++ /dev/null
@@ -1,15 +0,0 @@
--- !Ups
--- Add a lock column to prevent batch updates from the source from removing manual corrections
-ALTER TABLE biosample ADD COLUMN locked boolean;
-
--- Then update existing records
--- Lock samples that have either sex or geocoord manually set
-UPDATE biosample
-SET locked = false;
-
--- Finally make the column non-null with default
-ALTER TABLE biosample ALTER COLUMN locked SET NOT NULL;
-ALTER TABLE biosample ALTER COLUMN locked SET DEFAULT false;
-
--- !Downs
-ALTER TABLE biosample DROP COLUMN locked;
\ No newline at end of file
diff --git a/conf/evolutions/default/12.sql b/conf/evolutions/default/12.sql
deleted file mode 100644
index e4c61254..00000000
--- a/conf/evolutions/default/12.sql
+++ /dev/null
@@ -1,59 +0,0 @@
--- !Ups
-
-ALTER TABLE ena_study RENAME TO genomic_studies;
-
-ALTER TABLE genomic_studies
- ADD COLUMN source VARCHAR(10),
- ADD COLUMN submission_date DATE,
- ADD COLUMN last_update DATE,
- ADD COLUMN bio_project_id VARCHAR(50),
- ADD COLUMN molecule VARCHAR(50),
- ADD COLUMN topology VARCHAR(50),
- ADD COLUMN taxonomy_id INTEGER,
- ADD COLUMN version VARCHAR(10);
-
--- Update existing source values
-UPDATE genomic_studies
-SET source = 'ENA';
-
--- Now add NOT NULL constraint after the update
-ALTER TABLE genomic_studies
- ALTER COLUMN source SET NOT NULL;
-
--- Add enum constraint to ensure only valid values
-ALTER TABLE genomic_studies
- ADD CONSTRAINT valid_source CHECK (source IN ('ENA', 'NCBI_BIOPROJECT', 'NCBI_GENBANK'));
-
--- Rename the column
-ALTER TABLE publication_ena_study
- RENAME COLUMN ena_study_id TO genomic_study_id;
-
--- Rename the foreign key constraint (if it exists)
-ALTER TABLE publication_ena_study
- RENAME CONSTRAINT publication_ena_study_ena_study_id_fkey
- TO publication_ena_study_genomic_study_id_fkey;
-
-
--- !Downs
-
--- Revert the foreign key constraint rename
-ALTER TABLE publication_ena_study
- RENAME CONSTRAINT publication_ena_study_genomic_study_id_fkey
- TO publication_ena_study_ena_study_id_fkey;
-
--- Revert the column rename
-ALTER TABLE publication_ena_study
- RENAME COLUMN genomic_study_id TO ena_study_id;
-
-ALTER TABLE genomic_studies
- DROP COLUMN source,
- DROP COLUMN submission_date,
- DROP COLUMN last_update,
- DROP COLUMN bio_project_id,
- DROP COLUMN molecule,
- DROP COLUMN topology,
- DROP COLUMN taxonomy_id,
- DROP COLUMN version;
-
-ALTER TABLE genomic_studies RENAME TO ena_study;
-
diff --git a/conf/evolutions/default/13.sql b/conf/evolutions/default/13.sql
deleted file mode 100644
index d7b2b002..00000000
--- a/conf/evolutions/default/13.sql
+++ /dev/null
@@ -1,5 +0,0 @@
--- !Ups
-ALTER TABLE genomic_studies ALTER COLUMN source TYPE varchar(20);
-
--- !Downs
-ALTER TABLE genomic_studies ALTER COLUMN source TYPE varchar(10);
\ No newline at end of file
diff --git a/conf/evolutions/default/14.sql b/conf/evolutions/default/14.sql
deleted file mode 100644
index 719ffbfb..00000000
--- a/conf/evolutions/default/14.sql
+++ /dev/null
@@ -1,16 +0,0 @@
-# --- !Ups
-
-CREATE TABLE biosample_original_haplogroup
-(
- id SERIAL PRIMARY KEY,
- biosample_id INT REFERENCES biosample (id) ON DELETE CASCADE,
- publication_id INT REFERENCES publication (id) ON DELETE CASCADE,
- original_y_haplogroup VARCHAR(255),
- original_mt_haplogroup VARCHAR(255),
- notes TEXT,
- UNIQUE (biosample_id, publication_id)
-);
-
--- !Downs
-
-DROP TABLE biosample_original_haplogroup;
\ No newline at end of file
diff --git a/conf/evolutions/default/15.sql b/conf/evolutions/default/15.sql
deleted file mode 100644
index 25ecf66f..00000000
--- a/conf/evolutions/default/15.sql
+++ /dev/null
@@ -1,32 +0,0 @@
-# --- !Ups
-
--- Create the new checksums table
-CREATE TABLE sequence_file_checksum (
- id SERIAL PRIMARY KEY,
- sequence_file_id INT NOT NULL,
- checksum VARCHAR(255) NOT NULL,
- algorithm VARCHAR(50) NOT NULL,
- verified_at TIMESTAMP NOT NULL,
- FOREIGN KEY (sequence_file_id) REFERENCES sequence_file (id) ON DELETE CASCADE,
- UNIQUE (sequence_file_id, algorithm)
-);
-
--- Migrate existing MD5 checksums
-INSERT INTO sequence_file_checksum (sequence_file_id, checksum, algorithm, verified_at)
-SELECT id, file_md5, 'MD5', created_at
-FROM sequence_file;
-
-ALTER TABLE sequence_file DROP COLUMN file_md5;
-
-# --- !Downs
-
--- If we dropped file_md5, restore it first
-ALTER TABLE sequence_file ADD COLUMN file_md5 VARCHAR(255);
-
--- Restore MD5 checksums if we dropped the column
-UPDATE sequence_file sf
-SET file_md5 = sfc.checksum
-FROM sequence_file_checksum sfc
-WHERE sf.id = sfc.sequence_file_id AND sfc.algorithm = 'MD5';
-
-DROP TABLE sequence_file_checksum;
\ No newline at end of file
diff --git a/conf/evolutions/default/16.sql b/conf/evolutions/default/16.sql
deleted file mode 100644
index 98ac5213..00000000
--- a/conf/evolutions/default/16.sql
+++ /dev/null
@@ -1,50 +0,0 @@
-# --- !Ups
--- First add the column with a temporary NULL constraint
-ALTER TABLE public.biosample
- ADD COLUMN sample_type varchar(10);
-
--- Update all existing rows to 'Standard'
-UPDATE public.biosample
-SET sample_type = 'Standard';
-
--- Now make the column NOT NULL and add the constraint
-ALTER TABLE public.biosample
- ALTER COLUMN sample_type SET NOT NULL,
- ADD CONSTRAINT biosample_type_check CHECK (
- sample_type IN ('Standard', 'PGP', 'Citizen', 'Ancient')
- );
-
--- Add the rest of the columns
-ALTER TABLE public.biosample
- ADD COLUMN pgp_participant_id varchar(50),
- ADD COLUMN citizen_biosample_did varchar(255),
- ADD COLUMN source_platform varchar(100),
- ADD COLUMN date_range_start integer,
- ADD COLUMN date_range_end integer;
-
--- Add constraints for PGP and Citizen samples
-ALTER TABLE public.biosample
- ADD CONSTRAINT pgp_participant_id_required
- CHECK (
- (sample_type != 'PGP') OR
- (sample_type = 'PGP' AND pgp_participant_id IS NOT NULL)
- );
-
-ALTER TABLE public.biosample
- ADD CONSTRAINT citizen_did_required
- CHECK (
- (sample_type != 'Citizen') OR
- (sample_type = 'Citizen' AND citizen_biosample_did IS NOT NULL)
- );
-
-# --- !Downs
-ALTER TABLE public.biosample
- DROP CONSTRAINT IF EXISTS citizen_did_required,
- DROP CONSTRAINT IF EXISTS pgp_participant_id_required,
- DROP CONSTRAINT IF EXISTS biosample_type_check,
- DROP COLUMN IF EXISTS date_range_end,
- DROP COLUMN IF EXISTS date_range_start,
- DROP COLUMN IF EXISTS source_platform,
- DROP COLUMN IF EXISTS citizen_biosample_did,
- DROP COLUMN IF EXISTS pgp_participant_id,
- DROP COLUMN IF EXISTS sample_type;
\ No newline at end of file
diff --git a/conf/evolutions/default/17.sql b/conf/evolutions/default/17.sql
deleted file mode 100644
index 4cd71fb0..00000000
--- a/conf/evolutions/default/17.sql
+++ /dev/null
@@ -1,6 +0,0 @@
-# --- !Ups
---- Establish a sequence for citizen biosamples for acession generation
-CREATE SEQUENCE IF NOT EXISTS citizen_biosample_seq START 1;
-
-# --- !Downs
-DROP SEQUENCE IF EXISTS citizen_biosample_seq;
\ No newline at end of file
diff --git a/conf/evolutions/default/18.sql b/conf/evolutions/default/18.sql
deleted file mode 100644
index 146c94ed..00000000
--- a/conf/evolutions/default/18.sql
+++ /dev/null
@@ -1,104 +0,0 @@
--- !Ups
-
--- Create enum types
-CREATE TYPE biological_sex AS ENUM ('male', 'female', 'intersex');
-CREATE TYPE biosample_type AS ENUM ('Standard', 'PGP', 'Citizen', 'Ancient');
-
--- Add new columns to specimen_donor with temporary nullability
-ALTER TABLE specimen_donor
- ADD COLUMN sex biological_sex,
- ADD COLUMN geocoord geometry(Point, 4326),
- ADD COLUMN date_range_start integer,
- ADD COLUMN date_range_end integer,
- ADD COLUMN donor_type biosample_type,
- ADD COLUMN pgp_participant_id varchar(50),
- ADD COLUMN citizen_biosample_did varchar(255);
-
--- Migrate data from biosample to specimen_donor
-UPDATE specimen_donor sd
-SET sex = CASE
- WHEN b.sex = 'male' THEN 'male'::biological_sex
- WHEN b.sex = 'female' THEN 'female'::biological_sex
- WHEN b.sex = 'intersex' THEN 'intersex'::biological_sex
- ELSE NULL
- END,
- geocoord = b.geocoord,
- date_range_start = b.date_range_start,
- date_range_end = b.date_range_end,
- donor_type = b.sample_type::biosample_type,
- pgp_participant_id = b.pgp_participant_id,
- citizen_biosample_did = b.citizen_biosample_did
-FROM biosample b
-WHERE b.specimen_donor_id = sd.id;
-
--- Set default value and not null constraint for donor_type after data migration
-ALTER TABLE specimen_donor
- ALTER COLUMN donor_type SET NOT NULL,
- ALTER COLUMN donor_type SET DEFAULT 'Standard',
- ADD CONSTRAINT pgp_participant_id_required
- CHECK (donor_type != 'PGP' OR (donor_type = 'PGP' AND pgp_participant_id IS NOT NULL)),
- ADD CONSTRAINT citizen_did_required
- CHECK (donor_type != 'Citizen' OR (donor_type = 'Citizen' AND citizen_biosample_did IS NOT NULL));
-
--- Remove migrated columns from biosample
-ALTER TABLE biosample
- DROP COLUMN sex,
- DROP COLUMN geocoord,
- DROP COLUMN sample_type,
- DROP COLUMN pgp_participant_id,
- DROP COLUMN citizen_biosample_did,
- DROP COLUMN date_range_start,
- DROP COLUMN date_range_end,
- DROP CONSTRAINT IF EXISTS pgp_participant_id_required,
- DROP CONSTRAINT IF EXISTS citizen_did_required,
- DROP CONSTRAINT IF EXISTS biosample_sex_check,
- DROP CONSTRAINT IF EXISTS biosample_type_check;
-
--- !Downs
-
--- Add columns back to biosample
-ALTER TABLE biosample
- ADD COLUMN sex varchar(15),
- ADD COLUMN geocoord geometry(Point, 4326),
- ADD COLUMN sample_type varchar(10),
- ADD COLUMN pgp_participant_id varchar(50),
- ADD COLUMN citizen_biosample_did varchar(255),
- ADD COLUMN date_range_start integer,
- ADD COLUMN date_range_end integer;
-
--- Migrate data back from specimen_donor to biosample
-UPDATE biosample b
-SET sex = sd.sex::text,
- geocoord = sd.geocoord,
- sample_type = sd.donor_type::text,
- pgp_participant_id = sd.pgp_participant_id,
- citizen_biosample_did = sd.citizen_biosample_did,
- date_range_start = sd.date_range_start,
- date_range_end = sd.date_range_end
-FROM specimen_donor sd
-WHERE b.specimen_donor_id = sd.id;
-
--- Add constraints back to biosample
-ALTER TABLE biosample
- ADD CONSTRAINT biosample_sex_check
- CHECK (sex IN ('male', 'female', 'intersex')),
- ADD CONSTRAINT biosample_type_check
- CHECK (sample_type IN ('Standard', 'PGP', 'Citizen', 'Ancient')),
- ADD CONSTRAINT pgp_participant_id_required
- CHECK (sample_type != 'PGP' OR (sample_type = 'PGP' AND pgp_participant_id IS NOT NULL)),
- ADD CONSTRAINT citizen_did_required
- CHECK (sample_type != 'Citizen' OR (sample_type = 'Citizen' AND citizen_biosample_did IS NOT NULL));
-
--- Remove added columns from specimen_donor
-ALTER TABLE specimen_donor
- DROP COLUMN sex,
- DROP COLUMN geocoord,
- DROP COLUMN date_range_start,
- DROP COLUMN date_range_end,
- DROP COLUMN donor_type,
- DROP COLUMN pgp_participant_id,
- DROP COLUMN citizen_biosample_did;
-
--- Drop the enum types (need to drop them last since columns depend on them)
-DROP TYPE biological_sex;
-DROP TYPE biosample_type;
\ No newline at end of file
diff --git a/conf/evolutions/default/19.sql b/conf/evolutions/default/19.sql
deleted file mode 100644
index edf1b484..00000000
--- a/conf/evolutions/default/19.sql
+++ /dev/null
@@ -1,95 +0,0 @@
--- !Ups
-
--- First, drop existing tables in correct order
-DROP TABLE IF EXISTS public.pangenome_alignment_coverage CASCADE;
-DROP TABLE IF EXISTS public.pangenome_alignment_metadata CASCADE;
-DROP TABLE IF EXISTS public.pangenome_variant_link CASCADE;
-DROP TABLE IF EXISTS public.pangenome_edge CASCADE;
-DROP TABLE IF EXISTS public.pangenome_path CASCADE;
-DROP TABLE IF EXISTS public.pangenome_node CASCADE;
-DROP TABLE IF EXISTS public.canonical_pangenome_variant CASCADE;
-DROP TABLE IF EXISTS public.pangenome_graph CASCADE;
-
--- Create new simplified tables
-CREATE TABLE public.pangenome_graph (
- id BIGSERIAL PRIMARY KEY,
- graph_name VARCHAR(255) NOT NULL,
- source_gfa_file VARCHAR(255),
- description TEXT,
- creation_date TIMESTAMP DEFAULT now() NOT NULL
-);
-
-CREATE TABLE public.pangenome_path (
- id BIGSERIAL PRIMARY KEY,
- graph_id BIGINT NOT NULL REFERENCES public.pangenome_graph(id),
- path_name VARCHAR(255) NOT NULL,
- is_reference BOOLEAN DEFAULT FALSE,
- length_bp BIGINT,
- description TEXT
-);
-
-CREATE TABLE public.pangenome_node (
- id BIGSERIAL PRIMARY KEY,
- graph_id BIGINT NOT NULL REFERENCES public.pangenome_graph(id),
- node_name VARCHAR(255) NOT NULL,
- sequence_length BIGINT
-);
-
-CREATE TABLE public.canonical_pangenome_variant (
- id BIGSERIAL PRIMARY KEY,
- pangenome_graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph(id),
- variant_type VARCHAR(50) NOT NULL,
- variant_nodes INTEGER[] NOT NULL,
- variant_edges INTEGER[] DEFAULT '{}'::INTEGER[] NOT NULL,
- reference_path_id INTEGER REFERENCES public.pangenome_path(id),
- reference_start_position INTEGER,
- reference_end_position INTEGER,
- reference_allele_sequence TEXT,
- alternate_allele_sequence TEXT,
- canonical_hash VARCHAR(255) NOT NULL UNIQUE,
- description TEXT,
- creation_date TIMESTAMP DEFAULT now() NOT NULL
-);
-
-CREATE TABLE public.pangenome_alignment_metadata (
- id BIGSERIAL PRIMARY KEY,
- sequence_file_id BIGINT NOT NULL REFERENCES public.sequence_file(id) ON DELETE CASCADE,
- pangenome_graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph(id),
- metric_level VARCHAR(50) NOT NULL CHECK (metric_level IN ('GRAPH_OVERALL', 'PATH', 'NODE', 'REGION')),
- pangenome_path_id INTEGER REFERENCES public.pangenome_path(id),
- pangenome_node_id INTEGER REFERENCES public.pangenome_node(id),
- region_start_node_id INTEGER REFERENCES public.pangenome_node(id),
- region_end_node_id INTEGER REFERENCES public.pangenome_node(id),
- region_name VARCHAR(255),
- region_length_bp BIGINT,
- metrics_date TIMESTAMP NOT NULL DEFAULT NOW(),
- analysis_tool VARCHAR(255) NOT NULL,
- analysis_tool_version VARCHAR(50),
- notes TEXT,
- metadata JSONB
-);
-
-CREATE TABLE public.pangenome_alignment_coverage (
- alignment_metadata_id BIGINT PRIMARY KEY REFERENCES public.pangenome_alignment_metadata(id) ON DELETE CASCADE,
- mean_depth DOUBLE PRECISION,
- median_depth DOUBLE PRECISION,
- percent_coverage_at_1x DOUBLE PRECISION,
- percent_coverage_at_5x DOUBLE PRECISION,
- percent_coverage_at_10x DOUBLE PRECISION,
- percent_coverage_at_20x DOUBLE PRECISION,
- percent_coverage_at_30x DOUBLE PRECISION,
- bases_no_coverage BIGINT,
- bases_low_quality_mapping BIGINT,
- bases_callable BIGINT,
- mean_mapping_quality DOUBLE PRECISION
-);
-
--- !Downs
-
--- Re-create original tables in reverse order
-DROP TABLE IF EXISTS public.pangenome_alignment_coverage CASCADE;
-DROP TABLE IF EXISTS public.pangenome_alignment_metadata CASCADE;
-DROP TABLE IF EXISTS public.canonical_pangenome_variant CASCADE;
-DROP TABLE IF EXISTS public.pangenome_node CASCADE;
-DROP TABLE IF EXISTS public.pangenome_path CASCADE;
-DROP TABLE IF EXISTS public.pangenome_graph CASCADE;
\ No newline at end of file
diff --git a/conf/evolutions/default/2.sql b/conf/evolutions/default/2.sql
deleted file mode 100644
index 242dfbb1..00000000
--- a/conf/evolutions/default/2.sql
+++ /dev/null
@@ -1,968 +0,0 @@
-# --- !Ups
---- Load GRCh37, GRCh38 and chm13v2.0 GenBank contigs
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (370, 'CM000663.2', 'chr1', 'GRCh38.p14', 248956422);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (371, 'CM000664.2', 'chr2', 'GRCh38.p14', 242193529);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (372, 'CM000665.2', 'chr3', 'GRCh38.p14', 198295559);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (373, 'CM000666.2', 'chr4', 'GRCh38.p14', 190214555);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (374, 'CM000667.2', 'chr5', 'GRCh38.p14', 181538259);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (375, 'CM000668.2', 'chr6', 'GRCh38.p14', 170805979);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (376, 'CM000669.2', 'chr7', 'GRCh38.p14', 159345973);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (377, 'CM000670.2', 'chr8', 'GRCh38.p14', 145138636);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (378, 'CM000671.2', 'chr9', 'GRCh38.p14', 138394717);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (379, 'CM000672.2', 'chr10', 'GRCh38.p14', 133797422);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (380, 'CM000673.2', 'chr11', 'GRCh38.p14', 135086622);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (381, 'CM000674.2', 'chr12', 'GRCh38.p14', 133275309);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (382, 'CM000675.2', 'chr13', 'GRCh38.p14', 114364328);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (383, 'CM000676.2', 'chr14', 'GRCh38.p14', 107043718);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (384, 'CM000677.2', 'chr15', 'GRCh38.p14', 101991189);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (385, 'CM000678.2', 'chr16', 'GRCh38.p14', 90338345);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (386, 'CM000679.2', 'chr17', 'GRCh38.p14', 83257441);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (387, 'CM000680.2', 'chr18', 'GRCh38.p14', 80373285);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (388, 'CM000681.2', 'chr19', 'GRCh38.p14', 58617616);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (389, 'CM000682.2', 'chr20', 'GRCh38.p14', 64444167);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (390, 'CM000683.2', 'chr21', 'GRCh38.p14', 46709983);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (391, 'CM000684.2', 'chr22', 'GRCh38.p14', 50818468);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (392, 'CM000685.2', 'chrX', 'GRCh38.p14', 156040895);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (393, 'CM000686.2', 'chrY', 'GRCh38.p14', 57227415);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (394, 'KI270706.1', 'chr1_KI270706v1_random', 'GRCh38.p14', 175055);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (395, 'KI270707.1', 'chr1_KI270707v1_random', 'GRCh38.p14', 32032);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (396, 'KI270708.1', 'chr1_KI270708v1_random', 'GRCh38.p14', 127682);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (397, 'KI270709.1', 'chr1_KI270709v1_random', 'GRCh38.p14', 66860);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (398, 'KI270710.1', 'chr1_KI270710v1_random', 'GRCh38.p14', 40176);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (399, 'KI270711.1', 'chr1_KI270711v1_random', 'GRCh38.p14', 42210);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (400, 'KI270712.1', 'chr1_KI270712v1_random', 'GRCh38.p14', 176043);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (401, 'KI270713.1', 'chr1_KI270713v1_random', 'GRCh38.p14', 40745);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (402, 'KI270714.1', 'chr1_KI270714v1_random', 'GRCh38.p14', 41717);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (403, 'KI270715.1', 'chr2_KI270715v1_random', 'GRCh38.p14', 161471);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (404, 'KI270716.1', 'chr2_KI270716v1_random', 'GRCh38.p14', 153799);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (406, 'GL000008.2', 'chr4_GL000008v2_random', 'GRCh38.p14', 209709);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (408, 'KI270717.1', 'chr9_KI270717v1_random', 'GRCh38.p14', 40062);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (409, 'KI270718.1', 'chr9_KI270718v1_random', 'GRCh38.p14', 38054);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (410, 'KI270719.1', 'chr9_KI270719v1_random', 'GRCh38.p14', 176845);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (411, 'KI270720.1', 'chr9_KI270720v1_random', 'GRCh38.p14', 39050);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (412, 'GL000009.2', 'chr14_GL000009v2_random', 'GRCh38.p14', 201709);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (415, 'KI270722.1', 'chr14_KI270722v1_random', 'GRCh38.p14', 194050);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (416, 'KI270723.1', 'chr14_KI270723v1_random', 'GRCh38.p14', 38115);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (417, 'KI270724.1', 'chr14_KI270724v1_random', 'GRCh38.p14', 39555);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (418, 'KI270725.1', 'chr14_KI270725v1_random', 'GRCh38.p14', 172810);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (419, 'KI270726.1', 'chr14_KI270726v1_random', 'GRCh38.p14', 43739);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (420, 'KI270727.1', 'chr15_KI270727v1_random', 'GRCh38.p14', 448248);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (421, 'KI270728.1', 'chr16_KI270728v1_random', 'GRCh38.p14', 1872759);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (422, 'GL000205.2', 'chr17_GL000205v2_random', 'GRCh38.p14', 185591);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (423, 'KI270729.1', 'chr17_KI270729v1_random', 'GRCh38.p14', 280839);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (424, 'KI270730.1', 'chr17_KI270730v1_random', 'GRCh38.p14', 112551);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (425, 'KI270731.1', 'chr22_KI270731v1_random', 'GRCh38.p14', 150754);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (426, 'KI270732.1', 'chr22_KI270732v1_random', 'GRCh38.p14', 41543);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (427, 'KI270733.1', 'chr22_KI270733v1_random', 'GRCh38.p14', 179772);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (428, 'KI270735.1', 'chr22_KI270735v1_random', 'GRCh38.p14', 42811);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (429, 'KI270736.1', 'chr22_KI270736v1_random', 'GRCh38.p14', 181920);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (430, 'KI270737.1', 'chr22_KI270737v1_random', 'GRCh38.p14', 103838);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (431, 'KI270738.1', 'chr22_KI270738v1_random', 'GRCh38.p14', 99375);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (432, 'KI270739.1', 'chr22_KI270739v1_random', 'GRCh38.p14', 73985);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (433, 'KI270740.1', 'chrY_KI270740v1_random', 'GRCh38.p14', 37240);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (437, 'GL000216.2', 'chrUn_GL000216v2', 'GRCh38.p14', 176608);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (443, 'KI270302.1', 'chrUn_KI270302v1', 'GRCh38.p14', 2274);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (444, 'KI270303.1', 'chrUn_KI270303v1', 'GRCh38.p14', 1942);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (445, 'KI270304.1', 'chrUn_KI270304v1', 'GRCh38.p14', 2165);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (446, 'KI270305.1', 'chrUn_KI270305v1', 'GRCh38.p14', 1472);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (447, 'KI270310.1', 'chrUn_KI270310v1', 'GRCh38.p14', 1201);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (448, 'KI270311.1', 'chrUn_KI270311v1', 'GRCh38.p14', 12399);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (449, 'KI270312.1', 'chrUn_KI270312v1', 'GRCh38.p14', 998);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (450, 'KI270315.1', 'chrUn_KI270315v1', 'GRCh38.p14', 2276);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (451, 'KI270316.1', 'chrUn_KI270316v1', 'GRCh38.p14', 1444);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (452, 'KI270317.1', 'chrUn_KI270317v1', 'GRCh38.p14', 37690);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (453, 'KI270320.1', 'chrUn_KI270320v1', 'GRCh38.p14', 4416);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (454, 'KI270322.1', 'chrUn_KI270322v1', 'GRCh38.p14', 21476);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (455, 'KI270329.1', 'chrUn_KI270329v1', 'GRCh38.p14', 1040);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (456, 'KI270330.1', 'chrUn_KI270330v1', 'GRCh38.p14', 1652);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (457, 'KI270333.1', 'chrUn_KI270333v1', 'GRCh38.p14', 2699);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (458, 'KI270334.1', 'chrUn_KI270334v1', 'GRCh38.p14', 1368);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (459, 'KI270335.1', 'chrUn_KI270335v1', 'GRCh38.p14', 1048);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (460, 'KI270336.1', 'chrUn_KI270336v1', 'GRCh38.p14', 1026);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (461, 'KI270337.1', 'chrUn_KI270337v1', 'GRCh38.p14', 1121);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (462, 'KI270338.1', 'chrUn_KI270338v1', 'GRCh38.p14', 1428);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (463, 'KI270340.1', 'chrUn_KI270340v1', 'GRCh38.p14', 1428);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (464, 'KI270362.1', 'chrUn_KI270362v1', 'GRCh38.p14', 3530);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (465, 'KI270363.1', 'chrUn_KI270363v1', 'GRCh38.p14', 1803);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (466, 'KI270364.1', 'chrUn_KI270364v1', 'GRCh38.p14', 2855);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (467, 'KI270366.1', 'chrUn_KI270366v1', 'GRCh38.p14', 8320);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (468, 'KI270371.1', 'chrUn_KI270371v1', 'GRCh38.p14', 2805);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (469, 'KI270372.1', 'chrUn_KI270372v1', 'GRCh38.p14', 1650);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (470, 'KI270373.1', 'chrUn_KI270373v1', 'GRCh38.p14', 1451);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (471, 'KI270374.1', 'chrUn_KI270374v1', 'GRCh38.p14', 2656);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (472, 'KI270375.1', 'chrUn_KI270375v1', 'GRCh38.p14', 2378);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (473, 'KI270376.1', 'chrUn_KI270376v1', 'GRCh38.p14', 1136);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (474, 'KI270378.1', 'chrUn_KI270378v1', 'GRCh38.p14', 1048);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (475, 'KI270379.1', 'chrUn_KI270379v1', 'GRCh38.p14', 1045);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (476, 'KI270381.1', 'chrUn_KI270381v1', 'GRCh38.p14', 1930);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (477, 'KI270382.1', 'chrUn_KI270382v1', 'GRCh38.p14', 4215);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (478, 'KI270383.1', 'chrUn_KI270383v1', 'GRCh38.p14', 1750);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (479, 'KI270384.1', 'chrUn_KI270384v1', 'GRCh38.p14', 1658);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (480, 'KI270385.1', 'chrUn_KI270385v1', 'GRCh38.p14', 990);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (481, 'KI270386.1', 'chrUn_KI270386v1', 'GRCh38.p14', 1788);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (482, 'KI270387.1', 'chrUn_KI270387v1', 'GRCh38.p14', 1537);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (483, 'KI270388.1', 'chrUn_KI270388v1', 'GRCh38.p14', 1216);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (484, 'KI270389.1', 'chrUn_KI270389v1', 'GRCh38.p14', 1298);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (485, 'KI270390.1', 'chrUn_KI270390v1', 'GRCh38.p14', 2387);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (486, 'KI270391.1', 'chrUn_KI270391v1', 'GRCh38.p14', 1484);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (487, 'KI270392.1', 'chrUn_KI270392v1', 'GRCh38.p14', 971);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (488, 'KI270393.1', 'chrUn_KI270393v1', 'GRCh38.p14', 1308);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (489, 'KI270394.1', 'chrUn_KI270394v1', 'GRCh38.p14', 970);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (490, 'KI270395.1', 'chrUn_KI270395v1', 'GRCh38.p14', 1143);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (491, 'KI270396.1', 'chrUn_KI270396v1', 'GRCh38.p14', 1880);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (492, 'KI270411.1', 'chrUn_KI270411v1', 'GRCh38.p14', 2646);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (493, 'KI270412.1', 'chrUn_KI270412v1', 'GRCh38.p14', 1179);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (494, 'KI270414.1', 'chrUn_KI270414v1', 'GRCh38.p14', 2489);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (495, 'KI270417.1', 'chrUn_KI270417v1', 'GRCh38.p14', 2043);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (496, 'KI270418.1', 'chrUn_KI270418v1', 'GRCh38.p14', 2145);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (497, 'KI270419.1', 'chrUn_KI270419v1', 'GRCh38.p14', 1029);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (498, 'KI270420.1', 'chrUn_KI270420v1', 'GRCh38.p14', 2321);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (499, 'KI270422.1', 'chrUn_KI270422v1', 'GRCh38.p14', 1445);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (500, 'KI270423.1', 'chrUn_KI270423v1', 'GRCh38.p14', 981);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (501, 'KI270424.1', 'chrUn_KI270424v1', 'GRCh38.p14', 2140);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (502, 'KI270425.1', 'chrUn_KI270425v1', 'GRCh38.p14', 1884);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (503, 'KI270429.1', 'chrUn_KI270429v1', 'GRCh38.p14', 1361);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (504, 'KI270435.1', 'chrUn_KI270435v1', 'GRCh38.p14', 92983);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (505, 'KI270438.1', 'chrUn_KI270438v1', 'GRCh38.p14', 112505);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (506, 'KI270442.1', 'chrUn_KI270442v1', 'GRCh38.p14', 392061);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (507, 'KI270448.1', 'chrUn_KI270448v1', 'GRCh38.p14', 7992);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (508, 'KI270465.1', 'chrUn_KI270465v1', 'GRCh38.p14', 1774);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (509, 'KI270466.1', 'chrUn_KI270466v1', 'GRCh38.p14', 1233);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (510, 'KI270467.1', 'chrUn_KI270467v1', 'GRCh38.p14', 3920);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (511, 'KI270468.1', 'chrUn_KI270468v1', 'GRCh38.p14', 4055);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (512, 'KI270507.1', 'chrUn_KI270507v1', 'GRCh38.p14', 5353);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (513, 'KI270508.1', 'chrUn_KI270508v1', 'GRCh38.p14', 1951);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (514, 'KI270509.1', 'chrUn_KI270509v1', 'GRCh38.p14', 2318);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (515, 'KI270510.1', 'chrUn_KI270510v1', 'GRCh38.p14', 2415);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (516, 'KI270511.1', 'chrUn_KI270511v1', 'GRCh38.p14', 8127);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (517, 'KI270512.1', 'chrUn_KI270512v1', 'GRCh38.p14', 22689);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (518, 'KI270515.1', 'chrUn_KI270515v1', 'GRCh38.p14', 6361);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (519, 'KI270516.1', 'chrUn_KI270516v1', 'GRCh38.p14', 1300);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (520, 'KI270517.1', 'chrUn_KI270517v1', 'GRCh38.p14', 3253);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (521, 'KI270518.1', 'chrUn_KI270518v1', 'GRCh38.p14', 2186);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (522, 'KI270519.1', 'chrUn_KI270519v1', 'GRCh38.p14', 138126);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (523, 'KI270521.1', 'chrUn_KI270521v1', 'GRCh38.p14', 7642);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (524, 'KI270522.1', 'chrUn_KI270522v1', 'GRCh38.p14', 5674);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (525, 'KI270528.1', 'chrUn_KI270528v1', 'GRCh38.p14', 2983);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (526, 'KI270529.1', 'chrUn_KI270529v1', 'GRCh38.p14', 1899);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (527, 'KI270530.1', 'chrUn_KI270530v1', 'GRCh38.p14', 2168);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (528, 'KI270538.1', 'chrUn_KI270538v1', 'GRCh38.p14', 91309);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (529, 'KI270539.1', 'chrUn_KI270539v1', 'GRCh38.p14', 993);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (530, 'KI270544.1', 'chrUn_KI270544v1', 'GRCh38.p14', 1202);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (531, 'KI270548.1', 'chrUn_KI270548v1', 'GRCh38.p14', 1599);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (532, 'KI270579.1', 'chrUn_KI270579v1', 'GRCh38.p14', 31033);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (533, 'KI270580.1', 'chrUn_KI270580v1', 'GRCh38.p14', 1553);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (534, 'KI270581.1', 'chrUn_KI270581v1', 'GRCh38.p14', 7046);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (535, 'KI270582.1', 'chrUn_KI270582v1', 'GRCh38.p14', 6504);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (536, 'KI270583.1', 'chrUn_KI270583v1', 'GRCh38.p14', 1400);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (537, 'KI270584.1', 'chrUn_KI270584v1', 'GRCh38.p14', 4513);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (538, 'KI270587.1', 'chrUn_KI270587v1', 'GRCh38.p14', 2969);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (539, 'KI270588.1', 'chrUn_KI270588v1', 'GRCh38.p14', 6158);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (540, 'KI270589.1', 'chrUn_KI270589v1', 'GRCh38.p14', 44474);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (541, 'KI270590.1', 'chrUn_KI270590v1', 'GRCh38.p14', 4685);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (542, 'KI270591.1', 'chrUn_KI270591v1', 'GRCh38.p14', 5796);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (543, 'KI270593.1', 'chrUn_KI270593v1', 'GRCh38.p14', 3041);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (544, 'KI270741.1', 'chrUn_KI270741v1', 'GRCh38.p14', 157432);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (545, 'KI270742.1', 'chrUn_KI270742v1', 'GRCh38.p14', 186739);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (546, 'KI270743.1', 'chrUn_KI270743v1', 'GRCh38.p14', 210658);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (547, 'KI270744.1', 'chrUn_KI270744v1', 'GRCh38.p14', 168472);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (548, 'KI270745.1', 'chrUn_KI270745v1', 'GRCh38.p14', 41891);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (549, 'KI270746.1', 'chrUn_KI270746v1', 'GRCh38.p14', 66486);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (550, 'KI270747.1', 'chrUn_KI270747v1', 'GRCh38.p14', 198735);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (551, 'KI270748.1', 'chrUn_KI270748v1', 'GRCh38.p14', 93321);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (552, 'KI270749.1', 'chrUn_KI270749v1', 'GRCh38.p14', 158759);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (553, 'KI270750.1', 'chrUn_KI270750v1', 'GRCh38.p14', 148850);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (554, 'KI270751.1', 'chrUn_KI270751v1', 'GRCh38.p14', 150742);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (555, 'KI270753.1', 'chrUn_KI270753v1', 'GRCh38.p14', 62944);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (556, 'KI270754.1', 'chrUn_KI270754v1', 'GRCh38.p14', 40191);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (557, 'KI270755.1', 'chrUn_KI270755v1', 'GRCh38.p14', 36723);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (558, 'KI270756.1', 'chrUn_KI270756v1', 'GRCh38.p14', 79590);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (559, 'KI270757.1', 'chrUn_KI270757v1', 'GRCh38.p14', 71251);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (560, 'KN196472.1', 'chr1_KN196472v1_fix', 'GRCh38.p14', 186494);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (561, 'KN196473.1', 'chr1_KN196473v1_fix', 'GRCh38.p14', 166200);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (562, 'KN196474.1', 'chr1_KN196474v1_fix', 'GRCh38.p14', 122022);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (563, 'KN538360.1', 'chr1_KN538360v1_fix', 'GRCh38.p14', 460100);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (564, 'KN538361.1', 'chr1_KN538361v1_fix', 'GRCh38.p14', 305542);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (565, 'KQ031383.1', 'chr1_KQ031383v1_fix', 'GRCh38.p14', 467143);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (566, 'KZ208906.1', 'chr1_KZ208906v1_fix', 'GRCh38.p14', 330031);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (567, 'KZ559100.1', 'chr1_KZ559100v1_fix', 'GRCh38.p14', 44955);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (568, 'MU273333.1', 'chr1_MU273333v1_fix', 'GRCh38.p14', 1572686);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (569, 'MU273334.1', 'chr1_MU273334v1_fix', 'GRCh38.p14', 210426);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (570, 'MU273335.1', 'chr1_MU273335v1_fix', 'GRCh38.p14', 211934);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (571, 'MU273336.1', 'chr1_MU273336v1_fix', 'GRCh38.p14', 250447);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (572, 'KQ458382.1', 'chr1_KQ458382v1_alt', 'GRCh38.p14', 141019);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (573, 'KQ458383.1', 'chr1_KQ458383v1_alt', 'GRCh38.p14', 349938);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (574, 'KQ458384.1', 'chr1_KQ458384v1_alt', 'GRCh38.p14', 212205);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (575, 'KQ983255.1', 'chr1_KQ983255v1_alt', 'GRCh38.p14', 278659);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (576, 'KV880763.1', 'chr1_KV880763v1_alt', 'GRCh38.p14', 551020);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (577, 'KZ208904.1', 'chr1_KZ208904v1_alt', 'GRCh38.p14', 166136);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (578, 'KZ208905.1', 'chr1_KZ208905v1_alt', 'GRCh38.p14', 140355);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (579, 'MU273330.1', 'chr1_MU273330v1_alt', 'GRCh38.p14', 516764);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (580, 'MU273331.1', 'chr1_MU273331v1_alt', 'GRCh38.p14', 847441);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (581, 'MU273332.1', 'chr1_MU273332v1_alt', 'GRCh38.p14', 335159);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (582, 'KN538362.1', 'chr2_KN538362v1_fix', 'GRCh38.p14', 208149);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (583, 'KN538363.1', 'chr2_KN538363v1_fix', 'GRCh38.p14', 365499);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (584, 'KQ031384.1', 'chr2_KQ031384v1_fix', 'GRCh38.p14', 481245);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (585, 'ML143341.1', 'chr2_ML143341v1_fix', 'GRCh38.p14', 145975);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (586, 'ML143342.1', 'chr2_ML143342v1_fix', 'GRCh38.p14', 84043);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (587, 'MU273341.1', 'chr2_MU273341v1_fix', 'GRCh38.p14', 120381);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (588, 'MU273342.1', 'chr2_MU273342v1_fix', 'GRCh38.p14', 955087);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (589, 'MU273343.1', 'chr2_MU273343v1_fix', 'GRCh38.p14', 489404);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (590, 'MU273344.1', 'chr2_MU273344v1_fix', 'GRCh38.p14', 244725);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (591, 'MU273345.1', 'chr2_MU273345v1_fix', 'GRCh38.p14', 174385);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (592, 'KQ983256.1', 'chr2_KQ983256v1_alt', 'GRCh38.p14', 535088);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (593, 'KZ208907.1', 'chr2_KZ208907v1_alt', 'GRCh38.p14', 181658);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (594, 'KZ208908.1', 'chr2_KZ208908v1_alt', 'GRCh38.p14', 140361);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (595, 'MU273337.1', 'chr2_MU273337v1_alt', 'GRCh38.p14', 431782);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (596, 'MU273338.1', 'chr2_MU273338v1_alt', 'GRCh38.p14', 535251);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (597, 'MU273339.1', 'chr2_MU273339v1_alt', 'GRCh38.p14', 500581);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (598, 'MU273340.1', 'chr2_MU273340v1_alt', 'GRCh38.p14', 284971);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (599, 'KN196475.1', 'chr3_KN196475v1_fix', 'GRCh38.p14', 451168);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (600, 'KN196476.1', 'chr3_KN196476v1_fix', 'GRCh38.p14', 305979);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (601, 'KN538364.1', 'chr3_KN538364v1_fix', 'GRCh38.p14', 415308);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (602, 'KQ031385.1', 'chr3_KQ031385v1_fix', 'GRCh38.p14', 373699);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (603, 'KQ031386.1', 'chr3_KQ031386v1_fix', 'GRCh38.p14', 165718);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (604, 'KV766192.1', 'chr3_KV766192v1_fix', 'GRCh38.p14', 411654);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (605, 'KZ559104.1', 'chr3_KZ559104v1_fix', 'GRCh38.p14', 105527);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (606, 'MU273346.1', 'chr3_MU273346v1_fix', 'GRCh38.p14', 469342);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (607, 'MU273347.1', 'chr3_MU273347v1_fix', 'GRCh38.p14', 301310);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (608, 'MU273348.1', 'chr3_MU273348v1_fix', 'GRCh38.p14', 475876);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (609, 'KZ208909.1', 'chr3_KZ208909v1_alt', 'GRCh38.p14', 175849);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (610, 'KZ559101.1', 'chr3_KZ559101v1_alt', 'GRCh38.p14', 164041);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (611, 'KZ559102.1', 'chr3_KZ559102v1_alt', 'GRCh38.p14', 197752);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (612, 'KZ559103.1', 'chr3_KZ559103v1_alt', 'GRCh38.p14', 302885);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (613, 'KZ559105.1', 'chr3_KZ559105v1_alt', 'GRCh38.p14', 195063);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (614, 'ML143343.1', 'chr3_ML143343v1_alt', 'GRCh38.p14', 215443);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (615, 'KQ983257.1', 'chr4_KQ983257v1_fix', 'GRCh38.p14', 230434);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (616, 'ML143344.1', 'chr4_ML143344v1_fix', 'GRCh38.p14', 235734);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (617, 'ML143345.1', 'chr4_ML143345v1_fix', 'GRCh38.p14', 341066);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (618, 'ML143346.1', 'chr4_ML143346v1_fix', 'GRCh38.p14', 53476);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (619, 'ML143347.1', 'chr4_ML143347v1_fix', 'GRCh38.p14', 176674);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (620, 'ML143348.1', 'chr4_ML143348v1_fix', 'GRCh38.p14', 125549);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (621, 'ML143349.1', 'chr4_ML143349v1_fix', 'GRCh38.p14', 276109);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (622, 'MU273350.1', 'chr4_MU273350v1_fix', 'GRCh38.p14', 113364);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (623, 'MU273351.1', 'chr4_MU273351v1_fix', 'GRCh38.p14', 205691);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (624, 'KQ090013.1', 'chr4_KQ090013v1_alt', 'GRCh38.p14', 90922);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (625, 'KQ090014.1', 'chr4_KQ090014v1_alt', 'GRCh38.p14', 163749);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (626, 'KQ090015.1', 'chr4_KQ090015v1_alt', 'GRCh38.p14', 236512);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (627, 'KQ983258.1', 'chr4_KQ983258v1_alt', 'GRCh38.p14', 205407);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (628, 'KV766193.1', 'chr4_KV766193v1_alt', 'GRCh38.p14', 420675);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (629, 'MU273349.1', 'chr4_MU273349v1_alt', 'GRCh38.p14', 308682);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (630, 'KV575244.1', 'chr5_KV575244v1_fix', 'GRCh38.p14', 673059);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (631, 'ML143350.1', 'chr5_ML143350v1_fix', 'GRCh38.p14', 89956);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (632, 'MU273352.1', 'chr5_MU273352v1_fix', 'GRCh38.p14', 34400);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (633, 'MU273353.1', 'chr5_MU273353v1_fix', 'GRCh38.p14', 208405);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (634, 'MU273354.1', 'chr5_MU273354v1_fix', 'GRCh38.p14', 2101585);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (635, 'MU273355.1', 'chr5_MU273355v1_fix', 'GRCh38.p14', 508332);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (636, 'KN196477.1', 'chr5_KN196477v1_alt', 'GRCh38.p14', 139087);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (637, 'KV575243.1', 'chr5_KV575243v1_alt', 'GRCh38.p14', 362221);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (638, 'KZ208910.1', 'chr5_KZ208910v1_alt', 'GRCh38.p14', 135987);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (639, 'MU273356.1', 'chr5_MU273356v1_alt', 'GRCh38.p14', 302485);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (640, 'KN196478.1', 'chr6_KN196478v1_fix', 'GRCh38.p14', 268330);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (641, 'KQ031387.1', 'chr6_KQ031387v1_fix', 'GRCh38.p14', 320750);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (642, 'KQ090016.1', 'chr6_KQ090016v1_fix', 'GRCh38.p14', 245716);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (643, 'KV766194.1', 'chr6_KV766194v1_fix', 'GRCh38.p14', 139427);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (644, 'KZ208911.1', 'chr6_KZ208911v1_fix', 'GRCh38.p14', 242796);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (645, 'ML143351.1', 'chr6_ML143351v1_fix', 'GRCh38.p14', 73265);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (646, 'KQ090017.1', 'chr6_KQ090017v1_alt', 'GRCh38.p14', 82315);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (647, 'MU273357.1', 'chr6_MU273357v1_alt', 'GRCh38.p14', 383128);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (648, 'KQ031388.1', 'chr7_KQ031388v1_fix', 'GRCh38.p14', 179932);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (649, 'KV880764.1', 'chr7_KV880764v1_fix', 'GRCh38.p14', 142129);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1135, 'CP068277.2', 'chr1', 'T2T-CHM13v2.0', 248387328);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1136, 'CP068276.2', 'chr2', 'T2T-CHM13v2.0', 242696752);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1137, 'CP068275.2', 'chr3', 'T2T-CHM13v2.0', 201105948);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1138, 'CP068274.2', 'chr4', 'T2T-CHM13v2.0', 193574945);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1139, 'CP068273.2', 'chr5', 'T2T-CHM13v2.0', 182045439);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1140, 'CP068272.2', 'chr6', 'T2T-CHM13v2.0', 172126628);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1141, 'CP068271.2', 'chr7', 'T2T-CHM13v2.0', 160567428);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1142, 'CP068270.2', 'chr8', 'T2T-CHM13v2.0', 146259331);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1143, 'CP068269.2', 'chr9', 'T2T-CHM13v2.0', 150617247);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1144, 'CP068268.2', 'chr10', 'T2T-CHM13v2.0', 134758134);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1145, 'CP068267.2', 'chr11', 'T2T-CHM13v2.0', 135127769);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1146, 'CP068266.2', 'chr12', 'T2T-CHM13v2.0', 133324548);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1147, 'CP068265.2', 'chr13', 'T2T-CHM13v2.0', 113566686);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1148, 'CP068264.2', 'chr14', 'T2T-CHM13v2.0', 101161492);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1149, 'CP068263.2', 'chr15', 'T2T-CHM13v2.0', 99753195);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (222, 'JH159147.1', null, 'GRCh37.p13', 70345);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (223, 'JH159148.1', null, 'GRCh37.p13', 88070);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (224, 'GL383567.1', null, 'GRCh37.p13', 289831);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (225, 'GL383568.1', null, 'GRCh37.p13', 104552);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (226, 'GL383569.1', null, 'GRCh37.p13', 167950);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (227, 'GL383570.1', null, 'GRCh37.p13', 164789);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (228, 'GL383571.1', null, 'GRCh37.p13', 198278);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (229, 'GL383572.1', null, 'GRCh37.p13', 159547);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (230, 'GL582977.2', null, 'GRCh37.p13', 580393);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (231, 'JH159149.1', null, 'GRCh37.p13', 245473);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (232, 'KB021647.1', null, 'GRCh37.p13', 1058686);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (233, 'KE332505.1', null, 'GRCh37.p13', 579598);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (234, 'GL383573.1', null, 'GRCh37.p13', 385657);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (235, 'GL383574.1', null, 'GRCh37.p13', 155864);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (236, 'GL383575.2', null, 'GRCh37.p13', 170222);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (237, 'GL383576.1', null, 'GRCh37.p13', 188024);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (238, 'GL949746.1', null, 'GRCh37.p13', 987716);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (239, 'GL949747.1', null, 'GRCh37.p13', 729519);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (240, 'GL949748.1', null, 'GRCh37.p13', 1064303);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (241, 'GL949749.1', null, 'GRCh37.p13', 1091840);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (242, 'GL949750.1', null, 'GRCh37.p13', 1066389);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (243, 'GL949751.1', null, 'GRCh37.p13', 1002682);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (244, 'GL949752.1', null, 'GRCh37.p13', 987100);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (245, 'GL949753.1', null, 'GRCh37.p13', 796478);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (246, 'GL582979.2', null, 'GRCh37.p13', 179899);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (247, 'JH720448.1', null, 'GRCh37.p13', 70483);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (248, 'KB663608.1', null, 'GRCh37.p13', 283551);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (249, 'GL383577.1', null, 'GRCh37.p13', 128385);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (650, 'KV880765.1', 'chr7_KV880765v1_fix', 'GRCh38.p14', 468267);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (651, 'KZ208912.1', 'chr7_KZ208912v1_fix', 'GRCh38.p14', 589656);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (652, 'ML143352.1', 'chr7_ML143352v1_fix', 'GRCh38.p14', 254759);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (653, 'KZ208913.1', 'chr7_KZ208913v1_alt', 'GRCh38.p14', 680662);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (654, 'KZ559106.1', 'chr7_KZ559106v1_alt', 'GRCh38.p14', 172555);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1150, 'CP068262.2', 'chr16', 'T2T-CHM13v2.0', 96330374);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1151, 'CP068261.2', 'chr17', 'T2T-CHM13v2.0', 84276897);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1152, 'CP068260.2', 'chr18', 'T2T-CHM13v2.0', 80542538);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1153, 'CP068259.2', 'chr19', 'T2T-CHM13v2.0', 61707364);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1154, 'CP068258.2', 'chr20', 'T2T-CHM13v2.0', 66210255);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1155, 'CP068257.2', 'chr21', 'T2T-CHM13v2.0', 45090682);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1156, 'CP068256.2', 'chr22', 'T2T-CHM13v2.0', 51324926);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1157, 'CP068255.2', 'chrX', 'T2T-CHM13v2.0', 154259566);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1158, 'CP086569.2', 'chrY', 'T2T-CHM13v2.0', 62460029);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1159, 'CP068254.1', 'chrM', 'T2T-CHM13v2.0', 16569);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1, 'CM000663.1', 'chr1', 'GRCh37.p13', 249250621);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (2, 'CM000664.1', 'chr2', 'GRCh37.p13', 243199373);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (3, 'CM000665.1', 'chr3', 'GRCh37.p13', 198022430);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (4, 'CM000666.1', 'chr4', 'GRCh37.p13', 191154276);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (5, 'CM000667.1', 'chr5', 'GRCh37.p13', 180915260);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (6, 'CM000668.1', 'chr6', 'GRCh37.p13', 171115067);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (7, 'CM000669.1', 'chr7', 'GRCh37.p13', 159138663);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (8, 'CM000670.1', 'chr8', 'GRCh37.p13', 146364022);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (9, 'CM000671.1', 'chr9', 'GRCh37.p13', 141213431);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (10, 'CM000672.1', 'chr10', 'GRCh37.p13', 135534747);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (11, 'CM000673.1', 'chr11', 'GRCh37.p13', 135006516);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (12, 'CM000674.1', 'chr12', 'GRCh37.p13', 133851895);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (13, 'CM000675.1', 'chr13', 'GRCh37.p13', 115169878);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (14, 'CM000676.1', 'chr14', 'GRCh37.p13', 107349540);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (15, 'CM000677.1', 'chr15', 'GRCh37.p13', 102531392);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (16, 'CM000678.1', 'chr16', 'GRCh37.p13', 90354753);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (17, 'CM000679.1', 'chr17', 'GRCh37.p13', 81195210);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (18, 'CM000680.1', 'chr18', 'GRCh37.p13', 78077248);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (19, 'CM000681.1', 'chr19', 'GRCh37.p13', 59128983);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (20, 'CM000682.1', 'chr20', 'GRCh37.p13', 63025520);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (21, 'CM000683.1', 'chr21', 'GRCh37.p13', 48129895);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (22, 'CM000684.1', 'chr22', 'GRCh37.p13', 51304566);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (23, 'CM000685.1', 'chrX', 'GRCh37.p13', 155270560);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (24, 'CM000686.1', 'chrY', 'GRCh37.p13', 59373566);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (25, 'GL000191.1', 'chr1_gl000191_random', 'GRCh37.p13', 106433);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (26, 'GL000192.1', 'chr1_gl000192_random', 'GRCh37.p13', 547496);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (27, 'GL000193.1', 'chr4_gl000193_random', 'GRCh37.p13', 189789);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (28, 'GL000194.1', 'chr4_gl000194_random', 'GRCh37.p13', 191469);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (29, 'GL000195.1', 'chr7_gl000195_random', 'GRCh37.p13', 182896);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (30, 'GL000196.1', 'chr8_gl000196_random', 'GRCh37.p13', 38914);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (31, 'GL000197.1', 'chr8_gl000197_random', 'GRCh37.p13', 37175);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (32, 'GL000198.1', 'chr9_gl000198_random', 'GRCh37.p13', 90085);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (33, 'GL000199.1', 'chr9_gl000199_random', 'GRCh37.p13', 169874);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (34, 'GL000200.1', 'chr9_gl000200_random', 'GRCh37.p13', 187035);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (35, 'GL000201.1', 'chr9_gl000201_random', 'GRCh37.p13', 36148);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (36, 'GL000202.1', 'chr11_gl000202_random', 'GRCh37.p13', 40103);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (286, 'JH806603.1', null, 'GRCh37.p13', 182949);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (37, 'GL000203.1', 'chr17_gl000203_random', 'GRCh37.p13', 37498);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (38, 'GL000204.1', 'chr17_gl000204_random', 'GRCh37.p13', 81310);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (39, 'GL000205.1', 'chr17_gl000205_random', 'GRCh37.p13', 174588);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (40, 'GL000206.1', 'chr17_gl000206_random', 'GRCh37.p13', 41001);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (41, 'GL000207.1', 'chr18_gl000207_random', 'GRCh37.p13', 4262);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (42, 'GL000208.1', 'chr19_gl000208_random', 'GRCh37.p13', 92689);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (43, 'GL000209.1', 'chr19_gl000209_random', 'GRCh37.p13', 159169);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (44, 'GL000210.1', 'chr21_gl000210_random', 'GRCh37.p13', 27682);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (287, 'KB021648.1', null, 'GRCh37.p13', 469972);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (45, 'GL000211.1', 'chrUn_gl000211', 'GRCh37.p13', 166566);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (46, 'GL000212.1', 'chrUn_gl000212', 'GRCh37.p13', 186858);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (47, 'GL000213.1', 'chrUn_gl000213', 'GRCh37.p13', 164239);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (48, 'GL000214.1', 'chrUn_gl000214', 'GRCh37.p13', 137718);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (49, 'GL000215.1', 'chrUn_gl000215', 'GRCh37.p13', 172545);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (50, 'GL000216.1', 'chrUn_gl000216', 'GRCh37.p13', 172294);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (51, 'GL000217.1', 'chrUn_gl000217', 'GRCh37.p13', 172149);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (52, 'GL000218.1', 'chrUn_gl000218', 'GRCh37.p13', 161147);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (53, 'GL000219.1', 'chrUn_gl000219', 'GRCh37.p13', 179198);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (54, 'GL000220.1', 'chrUn_gl000220', 'GRCh37.p13', 161802);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (55, 'GL000221.1', 'chrUn_gl000221', 'GRCh37.p13', 155397);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (56, 'GL000222.1', 'chrUn_gl000222', 'GRCh37.p13', 186861);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (57, 'GL000223.1', 'chrUn_gl000223', 'GRCh37.p13', 180455);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (58, 'GL000224.1', 'chrUn_gl000224', 'GRCh37.p13', 179693);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (59, 'GL000225.1', 'chrUn_gl000225', 'GRCh37.p13', 211173);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (60, 'GL000226.1', 'chrUn_gl000226', 'GRCh37.p13', 15008);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (61, 'GL000227.1', 'chrUn_gl000227', 'GRCh37.p13', 128374);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (62, 'GL000228.1', 'chrUn_gl000228', 'GRCh37.p13', 129120);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (63, 'GL000229.1', 'chrUn_gl000229', 'GRCh37.p13', 19913);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (64, 'GL000230.1', 'chrUn_gl000230', 'GRCh37.p13', 43691);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (65, 'GL000231.1', 'chrUn_gl000231', 'GRCh37.p13', 27386);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (66, 'GL000232.1', 'chrUn_gl000232', 'GRCh37.p13', 40652);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (67, 'GL000233.1', 'chrUn_gl000233', 'GRCh37.p13', 45941);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (68, 'GL000234.1', 'chrUn_gl000234', 'GRCh37.p13', 40531);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (69, 'GL000235.1', 'chrUn_gl000235', 'GRCh37.p13', 34474);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (70, 'GL000236.1', 'chrUn_gl000236', 'GRCh37.p13', 41934);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (71, 'GL000237.1', 'chrUn_gl000237', 'GRCh37.p13', 45867);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (72, 'GL000238.1', 'chrUn_gl000238', 'GRCh37.p13', 39939);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (73, 'GL000239.1', 'chrUn_gl000239', 'GRCh37.p13', 33824);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (74, 'GL000240.1', 'chrUn_gl000240', 'GRCh37.p13', 41933);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (75, 'GL000241.1', 'chrUn_gl000241', 'GRCh37.p13', 42152);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (76, 'GL000242.1', 'chrUn_gl000242', 'GRCh37.p13', 43523);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (77, 'GL000243.1', 'chrUn_gl000243', 'GRCh37.p13', 43341);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (78, 'GL000244.1', 'chrUn_gl000244', 'GRCh37.p13', 39929);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (79, 'GL000245.1', 'chrUn_gl000245', 'GRCh37.p13', 36651);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (80, 'GL000246.1', 'chrUn_gl000246', 'GRCh37.p13', 38154);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (81, 'GL000247.1', 'chrUn_gl000247', 'GRCh37.p13', 36422);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (82, 'GL000248.1', 'chrUn_gl000248', 'GRCh37.p13', 39786);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (83, 'GL000249.1', 'chrUn_gl000249', 'GRCh37.p13', 38502);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (84, 'GL383516.1', null, 'GRCh37.p13', 49316);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (85, 'GL383517.1', null, 'GRCh37.p13', 49352);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (86, 'GL949741.1', null, 'GRCh37.p13', 151551);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (87, 'JH636052.4', null, 'GRCh37.p13', 7283150);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (88, 'JH636053.3', null, 'GRCh37.p13', 1676126);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (89, 'JH636054.1', null, 'GRCh37.p13', 758378);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (90, 'JH806573.1', null, 'GRCh37.p13', 24680);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (91, 'JH806574.2', null, 'GRCh37.p13', 22982);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (92, 'JH806575.1', null, 'GRCh37.p13', 47409);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (93, 'GL383518.1', null, 'GRCh37.p13', 182439);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (94, 'GL383519.1', null, 'GRCh37.p13', 110268);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (95, 'GL383520.1', null, 'GRCh37.p13', 366579);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (96, 'GL877870.2', null, 'GRCh37.p13', 66021);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (97, 'GL877871.1', null, 'GRCh37.p13', 389939);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (98, 'KB663603.1', null, 'GRCh37.p13', 599580);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (99, 'GL383521.1', null, 'GRCh37.p13', 143390);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (100, 'GL383522.1', null, 'GRCh37.p13', 123821);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (101, 'GL582966.2', null, 'GRCh37.p13', 96131);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (102, 'GL383523.1', null, 'GRCh37.p13', 171362);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (103, 'GL383524.1', null, 'GRCh37.p13', 78793);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (104, 'GL383525.1', null, 'GRCh37.p13', 65063);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (105, 'JH159131.1', null, 'GRCh37.p13', 393769);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (106, 'JH159132.1', null, 'GRCh37.p13', 100694);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (107, 'KE332495.1', null, 'GRCh37.p13', 263861);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (108, 'GL383526.1', null, 'GRCh37.p13', 180671);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (109, 'JH636055.1', null, 'GRCh37.p13', 173151);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (110, 'GL582967.1', null, 'GRCh37.p13', 248177);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (111, 'GL877872.1', null, 'GRCh37.p13', 297485);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (112, 'KE332496.1', null, 'GRCh37.p13', 503215);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (113, 'GL383527.1', null, 'GRCh37.p13', 164536);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (114, 'GL383528.1', null, 'GRCh37.p13', 376187);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (115, 'GL383529.1', null, 'GRCh37.p13', 121345);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (116, 'JH159133.1', null, 'GRCh37.p13', 266316);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (117, 'KE332497.1', null, 'GRCh37.p13', 543325);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (118, 'GL339449.2', null, 'GRCh37.p13', 1612928);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (119, 'GL383530.1', null, 'GRCh37.p13', 101241);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (120, 'GL383531.1', null, 'GRCh37.p13', 173459);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (121, 'GL383532.1', null, 'GRCh37.p13', 82728);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (122, 'GL949742.1', null, 'GRCh37.p13', 226852);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (123, 'JH636056.1', null, 'GRCh37.p13', 262912);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (124, 'JH636057.1', null, 'GRCh37.p13', 200195);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (125, 'JH806576.1', null, 'GRCh37.p13', 273386);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (126, 'KB663604.1', null, 'GRCh37.p13', 478993);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (127, 'KE332498.1', null, 'GRCh37.p13', 149443);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (128, 'GL383533.1', null, 'GRCh37.p13', 124736);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (129, 'KB021644.1', null, 'GRCh37.p13', 187824);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (130, 'GL582968.1', null, 'GRCh37.p13', 356330);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (131, 'GL582969.1', null, 'GRCh37.p13', 251823);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (132, 'GL582970.1', null, 'GRCh37.p13', 354970);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (133, 'GL582971.1', null, 'GRCh37.p13', 1284284);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (134, 'GL582972.1', null, 'GRCh37.p13', 327774);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (135, 'JH159134.2', null, 'GRCh37.p13', 3821770);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (136, 'JH636058.1', null, 'GRCh37.p13', 716227);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (137, 'KE332499.1', null, 'GRCh37.p13', 274521);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (138, 'GL383534.2', null, 'GRCh37.p13', 119183);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (139, 'GL383535.1', null, 'GRCh37.p13', 429806);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (140, 'GL383536.1', null, 'GRCh37.p13', 203777);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (141, 'GL949743.1', null, 'GRCh37.p13', 608579);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (142, 'JH159135.2', null, 'GRCh37.p13', 102251);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (143, 'KE332500.1', null, 'GRCh37.p13', 228602);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (144, 'GL339450.1', null, 'GRCh37.p13', 330164);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (145, 'GL383537.1', null, 'GRCh37.p13', 62435);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (146, 'GL383538.1', null, 'GRCh37.p13', 49281);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (147, 'JH636059.1', null, 'GRCh37.p13', 295379);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (148, 'JH806577.1', null, 'GRCh37.p13', 22394);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (149, 'JH806578.1', null, 'GRCh37.p13', 169437);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (150, 'JH806579.1', null, 'GRCh37.p13', 211307);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (151, 'KB663605.1', null, 'GRCh37.p13', 155926);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (152, 'GL383539.1', null, 'GRCh37.p13', 162988);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (153, 'GL383540.1', null, 'GRCh37.p13', 71551);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (154, 'GL383541.1', null, 'GRCh37.p13', 171286);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (155, 'GL383542.1', null, 'GRCh37.p13', 60032);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (156, 'GL383543.1', null, 'GRCh37.p13', 392792);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (157, 'GL383544.1', null, 'GRCh37.p13', 128378);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (158, 'GL877873.1', null, 'GRCh37.p13', 168465);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (159, 'JH591181.2', null, 'GRCh37.p13', 2281126);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (160, 'JH591182.1', null, 'GRCh37.p13', 196262);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (161, 'JH591183.1', null, 'GRCh37.p13', 177920);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (162, 'JH636060.1', null, 'GRCh37.p13', 437946);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (163, 'JH806580.1', null, 'GRCh37.p13', 93149);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (164, 'KB663606.1', null, 'GRCh37.p13', 305900);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (165, 'KE332501.1', null, 'GRCh37.p13', 1020827);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (166, 'GL383545.1', null, 'GRCh37.p13', 179254);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (167, 'GL383546.1', null, 'GRCh37.p13', 309802);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (168, 'GL582973.1', null, 'GRCh37.p13', 321004);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (169, 'GL949744.1', null, 'GRCh37.p13', 276448);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (170, 'JH159138.1', null, 'GRCh37.p13', 108875);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (171, 'JH159139.1', null, 'GRCh37.p13', 120441);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (172, 'JH159140.1', null, 'GRCh37.p13', 546435);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (173, 'JH159141.2', null, 'GRCh37.p13', 240775);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (174, 'JH159142.2', null, 'GRCh37.p13', 326647);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (175, 'JH159143.1', null, 'GRCh37.p13', 191402);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (176, 'JH591184.1', null, 'GRCh37.p13', 462282);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (177, 'JH591185.1', null, 'GRCh37.p13', 167437);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (178, 'JH720443.2', null, 'GRCh37.p13', 408430);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (179, 'JH806581.1', null, 'GRCh37.p13', 872115);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (180, 'GL383547.1', null, 'GRCh37.p13', 154407);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (181, 'JH159136.1', null, 'GRCh37.p13', 200998);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (182, 'JH159137.1', null, 'GRCh37.p13', 191409);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (183, 'GL383548.1', null, 'GRCh37.p13', 165247);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (184, 'GL582974.1', null, 'GRCh37.p13', 163298);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (185, 'JH720444.2', null, 'GRCh37.p13', 273128);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (186, 'KB663607.2', null, 'GRCh37.p13', 334922);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (187, 'GL383549.1', null, 'GRCh37.p13', 120804);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (188, 'GL383550.1', null, 'GRCh37.p13', 169178);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (189, 'GL383551.1', null, 'GRCh37.p13', 184319);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (190, 'GL383552.1', null, 'GRCh37.p13', 138655);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (191, 'GL383553.2', null, 'GRCh37.p13', 152874);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (192, 'GL877875.1', null, 'GRCh37.p13', 167313);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (193, 'GL877876.1', null, 'GRCh37.p13', 408271);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (194, 'GL949745.1', null, 'GRCh37.p13', 372609);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (195, 'GL582975.1', null, 'GRCh37.p13', 34662);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (196, 'KB021645.1', null, 'GRCh37.p13', 1523386);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (197, 'JH720445.1', null, 'GRCh37.p13', 170033);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (198, 'GL383554.1', null, 'GRCh37.p13', 296527);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (199, 'GL383555.1', null, 'GRCh37.p13', 388773);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (200, 'JH720446.1', null, 'GRCh37.p13', 97345);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (201, 'GL383556.1', null, 'GRCh37.p13', 192462);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (202, 'GL383557.1', null, 'GRCh37.p13', 89672);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (203, 'GL383558.1', null, 'GRCh37.p13', 457041);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (204, 'GL383559.2', null, 'GRCh37.p13', 338640);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (205, 'GL383560.1', null, 'GRCh37.p13', 534288);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (206, 'GL383561.2', null, 'GRCh37.p13', 644425);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (207, 'GL383562.1', null, 'GRCh37.p13', 45551);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (208, 'GL582976.1', null, 'GRCh37.p13', 412535);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (209, 'JH159144.1', null, 'GRCh37.p13', 388340);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (210, 'JH159145.1', null, 'GRCh37.p13', 194862);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (211, 'JH591186.1', null, 'GRCh37.p13', 376223);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (212, 'JH636061.1', null, 'GRCh37.p13', 186059);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (213, 'JH720447.1', null, 'GRCh37.p13', 454385);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (214, 'JH806582.2', null, 'GRCh37.p13', 342635);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (215, 'KB021646.2', null, 'GRCh37.p13', 211416);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (216, 'KE332502.1', null, 'GRCh37.p13', 341712);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (217, 'GL383563.2', null, 'GRCh37.p13', 270261);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (218, 'GL383564.1', null, 'GRCh37.p13', 133151);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (219, 'GL383565.1', null, 'GRCh37.p13', 223995);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (220, 'GL383566.1', null, 'GRCh37.p13', 90219);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (221, 'JH159146.1', null, 'GRCh37.p13', 278131);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (250, 'KE332506.1', null, 'GRCh37.p13', 307252);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (251, 'GL383578.1', null, 'GRCh37.p13', 63917);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (252, 'GL383579.1', null, 'GRCh37.p13', 201198);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (253, 'GL383580.1', null, 'GRCh37.p13', 74652);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (254, 'GL383581.1', null, 'GRCh37.p13', 116690);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (255, 'JH720449.1', null, 'GRCh37.p13', 212298);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (256, 'JH806583.1', null, 'GRCh37.p13', 167183);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (257, 'JH806584.1', null, 'GRCh37.p13', 70876);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (258, 'JH806585.1', null, 'GRCh37.p13', 73505);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (259, 'JH806586.1', null, 'GRCh37.p13', 43543);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (260, 'GL383582.2', null, 'GRCh37.p13', 162811);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (261, 'GL383583.1', null, 'GRCh37.p13', 96924);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (262, 'KB663609.1', null, 'GRCh37.p13', 74013);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (263, 'GL877877.2', null, 'GRCh37.p13', 284527);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (264, 'JH159150.3', null, 'GRCh37.p13', 3110903);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (265, 'JH720451.1', null, 'GRCh37.p13', 898979);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (266, 'JH720452.1', null, 'GRCh37.p13', 522319);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (267, 'JH720453.1', null, 'GRCh37.p13', 1461188);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (268, 'JH720454.3', null, 'GRCh37.p13', 752267);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (269, 'JH720455.1', null, 'GRCh37.p13', 65034);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (270, 'JH806587.1', null, 'GRCh37.p13', 4110759);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (271, 'JH806588.1', null, 'GRCh37.p13', 862483);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (272, 'JH806589.1', null, 'GRCh37.p13', 270630);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (273, 'JH806590.2', null, 'GRCh37.p13', 2418393);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (274, 'JH806591.1', null, 'GRCh37.p13', 882083);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (275, 'JH806592.1', null, 'GRCh37.p13', 835911);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (276, 'JH806593.1', null, 'GRCh37.p13', 389631);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (277, 'JH806594.1', null, 'GRCh37.p13', 390496);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (278, 'JH806595.1', null, 'GRCh37.p13', 444074);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (279, 'JH806596.1', null, 'GRCh37.p13', 413927);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (280, 'JH806597.1', null, 'GRCh37.p13', 1045622);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (281, 'JH806598.1', null, 'GRCh37.p13', 899320);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (282, 'JH806599.1', null, 'GRCh37.p13', 1214327);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (283, 'JH806600.2', null, 'GRCh37.p13', 6530008);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (284, 'JH806601.1', null, 'GRCh37.p13', 1389764);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (285, 'JH806602.1', null, 'GRCh37.p13', 713266);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (288, 'GL000250.1', 'chr6_apd_hap1', 'GRCh37.p13', 4622290);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (289, 'GL000251.1', 'chr6_cox_hap2', 'GRCh37.p13', 4795371);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (290, 'GL000252.1', 'chr6_dbb_hap3', 'GRCh37.p13', 4610396);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (291, 'GL000253.1', 'chr6_mann_hap4', 'GRCh37.p13', 4683263);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (292, 'GL000254.1', 'chr6_mcf_hap5', 'GRCh37.p13', 4833398);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (293, 'GL000255.1', 'chr6_qbl_hap6', 'GRCh37.p13', 4611984);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (294, 'GL000256.1', 'chr6_ssto_hap7', 'GRCh37.p13', 4928567);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (295, 'GL000257.1', 'chr4_ctg9_hap1', 'GRCh37.p13', 590426);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (296, 'GL000258.1', 'chr17_ctg5_hap1', 'GRCh37.p13', 1680828);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (297, 'J01415.2', 'chrM', 'GRCh37.p13', 16569);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (655, 'MU273358.1', 'chr7_MU273358v1_alt', 'GRCh38.p14', 464417);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (656, 'KV880766.1', 'chr8_KV880766v1_fix', 'GRCh38.p14', 156998);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (657, 'KV880767.1', 'chr8_KV880767v1_fix', 'GRCh38.p14', 265876);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (658, 'KZ208914.1', 'chr8_KZ208914v1_fix', 'GRCh38.p14', 165120);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (659, 'KZ208915.1', 'chr8_KZ208915v1_fix', 'GRCh38.p14', 6367528);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (660, 'MU273359.1', 'chr8_MU273359v1_fix', 'GRCh38.p14', 150302);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (661, 'MU273360.1', 'chr8_MU273360v1_fix', 'GRCh38.p14', 39290);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (662, 'MU273361.1', 'chr8_MU273361v1_fix', 'GRCh38.p14', 106905);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (663, 'MU273362.1', 'chr8_MU273362v1_fix', 'GRCh38.p14', 429744);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (664, 'MU273363.1', 'chr8_MU273363v1_fix', 'GRCh38.p14', 207371);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (665, 'KZ559107.1', 'chr8_KZ559107v1_alt', 'GRCh38.p14', 103072);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (666, 'KN196479.1', 'chr9_KN196479v1_fix', 'GRCh38.p14', 330164);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (667, 'ML143353.1', 'chr9_ML143353v1_fix', 'GRCh38.p14', 25408);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (668, 'MU273364.1', 'chr9_MU273364v1_fix', 'GRCh38.p14', 340717);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (669, 'MU273365.1', 'chr9_MU273365v1_fix', 'GRCh38.p14', 482250);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (670, 'MU273366.1', 'chr9_MU273366v1_fix', 'GRCh38.p14', 569668);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (671, 'KQ090018.1', 'chr9_KQ090018v1_alt', 'GRCh38.p14', 163882);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (672, 'KQ090019.1', 'chr9_KQ090019v1_alt', 'GRCh38.p14', 134099);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (673, 'KN196480.1', 'chr10_KN196480v1_fix', 'GRCh38.p14', 277797);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (674, 'KN538365.1', 'chr10_KN538365v1_fix', 'GRCh38.p14', 14347);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (675, 'KN538366.1', 'chr10_KN538366v1_fix', 'GRCh38.p14', 85284);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (676, 'KN538367.1', 'chr10_KN538367v1_fix', 'GRCh38.p14', 420164);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (677, 'KQ090021.1', 'chr10_KQ090021v1_fix', 'GRCh38.p14', 264545);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (678, 'ML143354.1', 'chr10_ML143354v1_fix', 'GRCh38.p14', 454963);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (679, 'ML143355.1', 'chr10_ML143355v1_fix', 'GRCh38.p14', 292944);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (680, 'MU273367.1', 'chr10_MU273367v1_fix', 'GRCh38.p14', 196262);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (681, 'KQ090020.1', 'chr10_KQ090020v1_alt', 'GRCh38.p14', 185507);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (682, 'KN196481.1', 'chr11_KN196481v1_fix', 'GRCh38.p14', 108875);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (683, 'KQ090022.1', 'chr11_KQ090022v1_fix', 'GRCh38.p14', 181958);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (684, 'KQ759759.2', 'chr11_KQ759759v2_fix', 'GRCh38.p14', 204999);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (685, 'KV766195.1', 'chr11_KV766195v1_fix', 'GRCh38.p14', 140877);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (686, 'KZ559108.1', 'chr11_KZ559108v1_fix', 'GRCh38.p14', 305244);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (687, 'KZ559109.1', 'chr11_KZ559109v1_fix', 'GRCh38.p14', 279644);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (688, 'ML143356.1', 'chr11_ML143356v1_fix', 'GRCh38.p14', 45257);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (689, 'ML143357.1', 'chr11_ML143357v1_fix', 'GRCh38.p14', 165419);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (690, 'ML143358.1', 'chr11_ML143358v1_fix', 'GRCh38.p14', 270122);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (691, 'ML143359.1', 'chr11_ML143359v1_fix', 'GRCh38.p14', 217075);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (692, 'ML143360.1', 'chr11_ML143360v1_fix', 'GRCh38.p14', 170928);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (693, 'MU273369.1', 'chr11_MU273369v1_fix', 'GRCh38.p14', 434831);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (694, 'MU273370.1', 'chr11_MU273370v1_fix', 'GRCh38.p14', 344606);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (695, 'MU273371.1', 'chr11_MU273371v1_fix', 'GRCh38.p14', 122722);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (696, 'KN538368.1', 'chr11_KN538368v1_alt', 'GRCh38.p14', 203552);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (697, 'KZ559110.1', 'chr11_KZ559110v1_alt', 'GRCh38.p14', 301637);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (698, 'KZ559111.1', 'chr11_KZ559111v1_alt', 'GRCh38.p14', 181167);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (699, 'MU273368.1', 'chr11_MU273368v1_alt', 'GRCh38.p14', 261194);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (700, 'KN196482.1', 'chr12_KN196482v1_fix', 'GRCh38.p14', 211377);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (701, 'KN538369.1', 'chr12_KN538369v1_fix', 'GRCh38.p14', 541038);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (702, 'KN538370.1', 'chr12_KN538370v1_fix', 'GRCh38.p14', 86533);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (703, 'KQ759760.1', 'chr12_KQ759760v1_fix', 'GRCh38.p14', 315610);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (704, 'KZ208916.1', 'chr12_KZ208916v1_fix', 'GRCh38.p14', 1046838);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (705, 'KZ208917.1', 'chr12_KZ208917v1_fix', 'GRCh38.p14', 64689);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (706, 'ML143361.1', 'chr12_ML143361v1_fix', 'GRCh38.p14', 297568);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (707, 'ML143362.1', 'chr12_ML143362v1_fix', 'GRCh38.p14', 192531);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (708, 'MU273372.1', 'chr12_MU273372v1_fix', 'GRCh38.p14', 104537);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (709, 'KQ090023.1', 'chr12_KQ090023v1_alt', 'GRCh38.p14', 109323);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (710, 'KZ208918.1', 'chr12_KZ208918v1_alt', 'GRCh38.p14', 174808);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (711, 'KZ559112.1', 'chr12_KZ559112v1_alt', 'GRCh38.p14', 154139);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (712, 'KN196483.1', 'chr13_KN196483v1_fix', 'GRCh38.p14', 35455);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (713, 'KN538371.1', 'chr13_KN538371v1_fix', 'GRCh38.p14', 206320);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (714, 'KN538372.1', 'chr13_KN538372v1_fix', 'GRCh38.p14', 356766);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (715, 'KN538373.1', 'chr13_KN538373v1_fix', 'GRCh38.p14', 148762);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (716, 'ML143363.1', 'chr13_ML143363v1_fix', 'GRCh38.p14', 7309);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (717, 'ML143364.1', 'chr13_ML143364v1_fix', 'GRCh38.p14', 158944);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (718, 'ML143365.1', 'chr13_ML143365v1_fix', 'GRCh38.p14', 65394);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (719, 'ML143366.1', 'chr13_ML143366v1_fix', 'GRCh38.p14', 409912);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (720, 'KQ090024.1', 'chr13_KQ090024v1_alt', 'GRCh38.p14', 168146);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (721, 'KQ090025.1', 'chr13_KQ090025v1_alt', 'GRCh38.p14', 123480);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (722, 'KZ208920.1', 'chr14_KZ208920v1_fix', 'GRCh38.p14', 690932);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (723, 'ML143367.1', 'chr14_ML143367v1_fix', 'GRCh38.p14', 399183);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (724, 'MU273373.1', 'chr14_MU273373v1_fix', 'GRCh38.p14', 722645);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (725, 'KZ208919.1', 'chr14_KZ208919v1_alt', 'GRCh38.p14', 171798);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (726, 'ML143368.1', 'chr14_ML143368v1_alt', 'GRCh38.p14', 264228);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (727, 'KN538374.1', 'chr15_KN538374v1_fix', 'GRCh38.p14', 4998962);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (728, 'ML143369.1', 'chr15_ML143369v1_fix', 'GRCh38.p14', 97763);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (729, 'ML143370.1', 'chr15_ML143370v1_fix', 'GRCh38.p14', 369264);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (730, 'ML143371.1', 'chr15_ML143371v1_fix', 'GRCh38.p14', 5500449);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (731, 'ML143372.1', 'chr15_ML143372v1_fix', 'GRCh38.p14', 396515);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (732, 'MU273374.1', 'chr15_MU273374v1_fix', 'GRCh38.p14', 1154574);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (733, 'KQ031389.1', 'chr15_KQ031389v1_alt', 'GRCh38.p14', 2365364);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (734, 'MU273375.1', 'chr15_MU273375v1_alt', 'GRCh38.p14', 204007);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (735, 'KV880768.1', 'chr16_KV880768v1_fix', 'GRCh38.p14', 1927115);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (736, 'KZ559113.1', 'chr16_KZ559113v1_fix', 'GRCh38.p14', 480415);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (737, 'ML143373.1', 'chr16_ML143373v1_fix', 'GRCh38.p14', 270967);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (738, 'MU273376.1', 'chr16_MU273376v1_fix', 'GRCh38.p14', 87715);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (739, 'MU273377.1', 'chr16_MU273377v1_fix', 'GRCh38.p14', 334997);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (740, 'KQ031390.1', 'chr16_KQ031390v1_alt', 'GRCh38.p14', 169136);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (741, 'KQ090026.1', 'chr16_KQ090026v1_alt', 'GRCh38.p14', 59016);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (742, 'KQ090027.1', 'chr16_KQ090027v1_alt', 'GRCh38.p14', 267463);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (743, 'KZ208921.1', 'chr16_KZ208921v1_alt', 'GRCh38.p14', 78609);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (744, 'KV575245.1', 'chr17_KV575245v1_fix', 'GRCh38.p14', 154723);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (745, 'KV766196.1', 'chr17_KV766196v1_fix', 'GRCh38.p14', 281919);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (746, 'ML143374.1', 'chr17_ML143374v1_fix', 'GRCh38.p14', 137908);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (747, 'ML143375.1', 'chr17_ML143375v1_fix', 'GRCh38.p14', 56695);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (748, 'MU273379.1', 'chr17_MU273379v1_fix', 'GRCh38.p14', 234878);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (749, 'MU273380.1', 'chr17_MU273380v1_fix', 'GRCh38.p14', 538541);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (750, 'MU273381.1', 'chr17_MU273381v1_fix', 'GRCh38.p14', 144689);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (751, 'MU273382.1', 'chr17_MU273382v1_fix', 'GRCh38.p14', 187626);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (752, 'MU273383.1', 'chr17_MU273383v1_fix', 'GRCh38.p14', 172609);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (753, 'KV766197.1', 'chr17_KV766197v1_alt', 'GRCh38.p14', 246895);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (754, 'KV766198.1', 'chr17_KV766198v1_alt', 'GRCh38.p14', 276292);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (755, 'KZ559114.1', 'chr17_KZ559114v1_alt', 'GRCh38.p14', 116753);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (756, 'MU273378.1', 'chr17_MU273378v1_alt', 'GRCh38.p14', 372839);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (757, 'KQ090028.1', 'chr18_KQ090028v1_fix', 'GRCh38.p14', 407387);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (758, 'KZ208922.1', 'chr18_KZ208922v1_fix', 'GRCh38.p14', 93070);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (759, 'KZ559115.1', 'chr18_KZ559115v1_fix', 'GRCh38.p14', 230843);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (760, 'KQ458385.1', 'chr18_KQ458385v1_alt', 'GRCh38.p14', 205101);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (761, 'KZ559116.1', 'chr18_KZ559116v1_alt', 'GRCh38.p14', 163186);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (762, 'KN196484.1', 'chr19_KN196484v1_fix', 'GRCh38.p14', 370917);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (763, 'KQ458386.1', 'chr19_KQ458386v1_fix', 'GRCh38.p14', 405389);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (764, 'ML143376.1', 'chr19_ML143376v1_fix', 'GRCh38.p14', 493165);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (765, 'MU273384.1', 'chr19_MU273384v1_fix', 'GRCh38.p14', 333754);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (766, 'MU273385.1', 'chr19_MU273385v1_fix', 'GRCh38.p14', 137818);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (767, 'MU273386.1', 'chr19_MU273386v1_fix', 'GRCh38.p14', 226166);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (768, 'KV575246.1', 'chr19_KV575246v1_alt', 'GRCh38.p14', 163926);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (769, 'KV575247.1', 'chr19_KV575247v1_alt', 'GRCh38.p14', 170206);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (815, 'KV575248.1', 'chr19_KV575248v1_alt', 'GRCh38.p14', 168131);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (816, 'KV575249.1', 'chr19_KV575249v1_alt', 'GRCh38.p14', 293522);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (817, 'KV575250.1', 'chr19_KV575250v1_alt', 'GRCh38.p14', 241058);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (818, 'KV575251.1', 'chr19_KV575251v1_alt', 'GRCh38.p14', 159285);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (819, 'KV575252.1', 'chr19_KV575252v1_alt', 'GRCh38.p14', 178197);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (820, 'KV575253.1', 'chr19_KV575253v1_alt', 'GRCh38.p14', 166713);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (821, 'KV575254.1', 'chr19_KV575254v1_alt', 'GRCh38.p14', 99845);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (822, 'KV575255.1', 'chr19_KV575255v1_alt', 'GRCh38.p14', 161095);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (823, 'KV575256.1', 'chr19_KV575256v1_alt', 'GRCh38.p14', 223118);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (824, 'KV575257.1', 'chr19_KV575257v1_alt', 'GRCh38.p14', 100553);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (825, 'KV575258.1', 'chr19_KV575258v1_alt', 'GRCh38.p14', 156965);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (826, 'KV575259.1', 'chr19_KV575259v1_alt', 'GRCh38.p14', 171263);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (827, 'KV575260.1', 'chr19_KV575260v1_alt', 'GRCh38.p14', 145691);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (828, 'MU273387.1', 'chr19_MU273387v1_alt', 'GRCh38.p14', 89211);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (829, 'MU273388.1', 'chr20_MU273388v1_fix', 'GRCh38.p14', 273725);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (830, 'MU273389.1', 'chr20_MU273389v1_fix', 'GRCh38.p14', 355731);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (831, 'ML143377.1', 'chr21_ML143377v1_fix', 'GRCh38.p14', 519485);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (832, 'MU273390.1', 'chr21_MU273390v1_fix', 'GRCh38.p14', 336752);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (833, 'MU273391.1', 'chr21_MU273391v1_fix', 'GRCh38.p14', 1020778);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (834, 'MU273392.1', 'chr21_MU273392v1_fix', 'GRCh38.p14', 189707);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (835, 'KQ759762.2', 'chr22_KQ759762v2_fix', 'GRCh38.p14', 101040);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (836, 'ML143378.1', 'chr22_ML143378v1_fix', 'GRCh38.p14', 461303);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (837, 'ML143379.1', 'chr22_ML143379v1_fix', 'GRCh38.p14', 12295);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (838, 'ML143380.1', 'chr22_ML143380v1_fix', 'GRCh38.p14', 412368);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (839, 'KN196485.1', 'chr22_KN196485v1_alt', 'GRCh38.p14', 156562);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (840, 'KN196486.1', 'chr22_KN196486v1_alt', 'GRCh38.p14', 153027);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (841, 'KQ458387.1', 'chr22_KQ458387v1_alt', 'GRCh38.p14', 155930);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (842, 'KQ458388.1', 'chr22_KQ458388v1_alt', 'GRCh38.p14', 174749);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (843, 'KQ759761.1', 'chr22_KQ759761v1_alt', 'GRCh38.p14', 145162);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (844, 'ML143381.1', 'chrX_ML143381v1_fix', 'GRCh38.p14', 403128);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (845, 'ML143382.1', 'chrX_ML143382v1_fix', 'GRCh38.p14', 28824);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (846, 'ML143383.1', 'chrX_ML143383v1_fix', 'GRCh38.p14', 68192);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (847, 'ML143384.1', 'chrX_ML143384v1_fix', 'GRCh38.p14', 14678);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (848, 'ML143385.1', 'chrX_ML143385v1_fix', 'GRCh38.p14', 17435);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (849, 'MU273393.1', 'chrX_MU273393v1_fix', 'GRCh38.p14', 68810);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (850, 'MU273394.1', 'chrX_MU273394v1_fix', 'GRCh38.p14', 140567);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (851, 'KV766199.1', 'chrX_KV766199v1_alt', 'GRCh38.p14', 188004);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (852, 'MU273395.1', 'chrX_MU273395v1_alt', 'GRCh38.p14', 619716);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (853, 'MU273396.1', 'chrX_MU273396v1_alt', 'GRCh38.p14', 294119);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (854, 'MU273397.1', 'chrX_MU273397v1_alt', 'GRCh38.p14', 330493);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (855, 'KN196487.1', 'chrY_KN196487v1_fix', 'GRCh38.p14', 101150);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (856, 'KZ208923.1', 'chrY_KZ208923v1_fix', 'GRCh38.p14', 48370);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (857, 'KZ208924.1', 'chrY_KZ208924v1_fix', 'GRCh38.p14', 209722);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (858, 'MU273398.1', 'chrY_MU273398v1_fix', 'GRCh38.p14', 865743);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (861, 'GL383520.2', 'chr1_GL383520v2_alt', 'GRCh38.p14', 366580);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (862, 'KI270759.1', 'chr1_KI270759v1_alt', 'GRCh38.p14', 425601);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (863, 'KI270760.1', 'chr1_KI270760v1_alt', 'GRCh38.p14', 109528);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (864, 'KI270761.1', 'chr1_KI270761v1_alt', 'GRCh38.p14', 165834);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (865, 'KI270762.1', 'chr1_KI270762v1_alt', 'GRCh38.p14', 354444);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (866, 'KI270763.1', 'chr1_KI270763v1_alt', 'GRCh38.p14', 911658);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (867, 'KI270764.1', 'chr1_KI270764v1_alt', 'GRCh38.p14', 50258);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (868, 'KI270765.1', 'chr1_KI270765v1_alt', 'GRCh38.p14', 185285);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (869, 'KI270766.1', 'chr1_KI270766v1_alt', 'GRCh38.p14', 256271);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (873, 'KI270767.1', 'chr2_KI270767v1_alt', 'GRCh38.p14', 161578);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (874, 'KI270768.1', 'chr2_KI270768v1_alt', 'GRCh38.p14', 110099);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (875, 'KI270769.1', 'chr2_KI270769v1_alt', 'GRCh38.p14', 120616);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (876, 'KI270770.1', 'chr2_KI270770v1_alt', 'GRCh38.p14', 136240);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (877, 'KI270771.1', 'chr2_KI270771v1_alt', 'GRCh38.p14', 110395);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (878, 'KI270772.1', 'chr2_KI270772v1_alt', 'GRCh38.p14', 133041);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (879, 'KI270773.1', 'chr2_KI270773v1_alt', 'GRCh38.p14', 70887);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (880, 'KI270774.1', 'chr2_KI270774v1_alt', 'GRCh38.p14', 223625);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (881, 'KI270775.1', 'chr2_KI270775v1_alt', 'GRCh38.p14', 138019);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (882, 'KI270776.1', 'chr2_KI270776v1_alt', 'GRCh38.p14', 174166);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (884, 'JH636055.2', 'chr3_JH636055v2_alt', 'GRCh38.p14', 173151);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (885, 'KI270777.1', 'chr3_KI270777v1_alt', 'GRCh38.p14', 173649);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (886, 'KI270778.1', 'chr3_KI270778v1_alt', 'GRCh38.p14', 248252);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (887, 'KI270779.1', 'chr3_KI270779v1_alt', 'GRCh38.p14', 205312);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (888, 'KI270780.1', 'chr3_KI270780v1_alt', 'GRCh38.p14', 224108);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (889, 'KI270781.1', 'chr3_KI270781v1_alt', 'GRCh38.p14', 113034);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (890, 'KI270782.1', 'chr3_KI270782v1_alt', 'GRCh38.p14', 162429);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (891, 'KI270783.1', 'chr3_KI270783v1_alt', 'GRCh38.p14', 109187);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (892, 'KI270784.1', 'chr3_KI270784v1_alt', 'GRCh38.p14', 184404);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (893, 'GL000257.2', 'chr4_GL000257v2_alt', 'GRCh38.p14', 586476);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (896, 'KI270785.1', 'chr4_KI270785v1_alt', 'GRCh38.p14', 119912);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (897, 'KI270786.1', 'chr4_KI270786v1_alt', 'GRCh38.p14', 244096);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (898, 'KI270787.1', 'chr4_KI270787v1_alt', 'GRCh38.p14', 111943);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (899, 'KI270788.1', 'chr4_KI270788v1_alt', 'GRCh38.p14', 158965);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (900, 'KI270789.1', 'chr4_KI270789v1_alt', 'GRCh38.p14', 205944);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (901, 'KI270790.1', 'chr4_KI270790v1_alt', 'GRCh38.p14', 220246);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (907, 'KI270791.1', 'chr5_KI270791v1_alt', 'GRCh38.p14', 195710);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (908, 'KI270792.1', 'chr5_KI270792v1_alt', 'GRCh38.p14', 179043);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (909, 'KI270793.1', 'chr5_KI270793v1_alt', 'GRCh38.p14', 126136);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (910, 'KI270794.1', 'chr5_KI270794v1_alt', 'GRCh38.p14', 164558);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (911, 'KI270795.1', 'chr5_KI270795v1_alt', 'GRCh38.p14', 131892);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (912, 'KI270796.1', 'chr5_KI270796v1_alt', 'GRCh38.p14', 172708);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (913, 'GL000250.2', 'chr6_GL000250v2_alt', 'GRCh38.p14', 4672374);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (923, 'KB021644.2', 'chr6_KB021644v2_alt', 'GRCh38.p14', 185823);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (924, 'KI270797.1', 'chr6_KI270797v1_alt', 'GRCh38.p14', 197536);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (925, 'KI270798.1', 'chr6_KI270798v1_alt', 'GRCh38.p14', 271782);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (926, 'KI270799.1', 'chr6_KI270799v1_alt', 'GRCh38.p14', 152148);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (927, 'KI270800.1', 'chr6_KI270800v1_alt', 'GRCh38.p14', 175808);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (928, 'KI270801.1', 'chr6_KI270801v1_alt', 'GRCh38.p14', 870480);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (929, 'KI270802.1', 'chr6_KI270802v1_alt', 'GRCh38.p14', 75005);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (931, 'KI270803.1', 'chr7_KI270803v1_alt', 'GRCh38.p14', 1111570);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (932, 'KI270804.1', 'chr7_KI270804v1_alt', 'GRCh38.p14', 157952);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (933, 'KI270805.1', 'chr7_KI270805v1_alt', 'GRCh38.p14', 209988);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (934, 'KI270806.1', 'chr7_KI270806v1_alt', 'GRCh38.p14', 158166);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (935, 'KI270807.1', 'chr7_KI270807v1_alt', 'GRCh38.p14', 126434);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (936, 'KI270808.1', 'chr7_KI270808v1_alt', 'GRCh38.p14', 271455);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (937, 'KI270809.1', 'chr7_KI270809v1_alt', 'GRCh38.p14', 209586);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (938, 'KI270810.1', 'chr8_KI270810v1_alt', 'GRCh38.p14', 374415);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (939, 'KI270811.1', 'chr8_KI270811v1_alt', 'GRCh38.p14', 292436);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (940, 'KI270812.1', 'chr8_KI270812v1_alt', 'GRCh38.p14', 282736);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (941, 'KI270813.1', 'chr8_KI270813v1_alt', 'GRCh38.p14', 300230);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (942, 'KI270814.1', 'chr8_KI270814v1_alt', 'GRCh38.p14', 141812);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (943, 'KI270815.1', 'chr8_KI270815v1_alt', 'GRCh38.p14', 132244);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (944, 'KI270816.1', 'chr8_KI270816v1_alt', 'GRCh38.p14', 305841);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (945, 'KI270817.1', 'chr8_KI270817v1_alt', 'GRCh38.p14', 158983);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (946, 'KI270818.1', 'chr8_KI270818v1_alt', 'GRCh38.p14', 145606);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (947, 'KI270819.1', 'chr8_KI270819v1_alt', 'GRCh38.p14', 133535);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (948, 'KI270820.1', 'chr8_KI270820v1_alt', 'GRCh38.p14', 36640);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (949, 'KI270821.1', 'chr8_KI270821v1_alt', 'GRCh38.p14', 985506);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (950, 'KI270822.1', 'chr8_KI270822v1_alt', 'GRCh38.p14', 624492);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (955, 'KI270823.1', 'chr9_KI270823v1_alt', 'GRCh38.p14', 439082);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (958, 'KI270824.1', 'chr10_KI270824v1_alt', 'GRCh38.p14', 181496);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (962, 'KI270826.1', 'chr11_KI270826v1_alt', 'GRCh38.p14', 186169);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (963, 'KI270827.1', 'chr11_KI270827v1_alt', 'GRCh38.p14', 67707);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (964, 'KI270829.1', 'chr11_KI270829v1_alt', 'GRCh38.p14', 204059);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (965, 'KI270830.1', 'chr11_KI270830v1_alt', 'GRCh38.p14', 177092);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (966, 'KI270831.1', 'chr11_KI270831v1_alt', 'GRCh38.p14', 296895);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (967, 'KI270832.1', 'chr11_KI270832v1_alt', 'GRCh38.p14', 210133);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (969, 'GL383550.2', 'chr12_GL383550v2_alt', 'GRCh38.p14', 169178);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (975, 'KI270833.1', 'chr12_KI270833v1_alt', 'GRCh38.p14', 76061);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (976, 'KI270834.1', 'chr12_KI270834v1_alt', 'GRCh38.p14', 119498);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (977, 'KI270835.1', 'chr12_KI270835v1_alt', 'GRCh38.p14', 238139);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (978, 'KI270836.1', 'chr12_KI270836v1_alt', 'GRCh38.p14', 56134);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (979, 'KI270837.1', 'chr12_KI270837v1_alt', 'GRCh38.p14', 40090);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (980, 'KI270838.1', 'chr13_KI270838v1_alt', 'GRCh38.p14', 306913);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (981, 'KI270839.1', 'chr13_KI270839v1_alt', 'GRCh38.p14', 180306);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (982, 'KI270840.1', 'chr13_KI270840v1_alt', 'GRCh38.p14', 191684);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (983, 'KI270841.1', 'chr13_KI270841v1_alt', 'GRCh38.p14', 169134);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (984, 'KI270842.1', 'chr13_KI270842v1_alt', 'GRCh38.p14', 37287);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (985, 'KI270843.1', 'chr13_KI270843v1_alt', 'GRCh38.p14', 103832);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (986, 'KI270844.1', 'chr14_KI270844v1_alt', 'GRCh38.p14', 322166);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (987, 'KI270845.1', 'chr14_KI270845v1_alt', 'GRCh38.p14', 180703);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (988, 'KI270846.1', 'chr14_KI270846v1_alt', 'GRCh38.p14', 1351393);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (989, 'KI270847.1', 'chr14_KI270847v1_alt', 'GRCh38.p14', 1511111);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (991, 'GL383555.2', 'chr15_GL383555v2_alt', 'GRCh38.p14', 388773);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (992, 'KI270848.1', 'chr15_KI270848v1_alt', 'GRCh38.p14', 327382);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (993, 'KI270849.1', 'chr15_KI270849v1_alt', 'GRCh38.p14', 244917);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (994, 'KI270850.1', 'chr15_KI270850v1_alt', 'GRCh38.p14', 430880);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (995, 'KI270851.1', 'chr15_KI270851v1_alt', 'GRCh38.p14', 263054);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (996, 'KI270852.1', 'chr15_KI270852v1_alt', 'GRCh38.p14', 478999);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (999, 'KI270853.1', 'chr16_KI270853v1_alt', 'GRCh38.p14', 2659700);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1000, 'KI270854.1', 'chr16_KI270854v1_alt', 'GRCh38.p14', 134193);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1001, 'KI270855.1', 'chr16_KI270855v1_alt', 'GRCh38.p14', 232857);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1002, 'KI270856.1', 'chr16_KI270856v1_alt', 'GRCh38.p14', 63982);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1003, 'GL000258.2', 'chr17_GL000258v2_alt', 'GRCh38.p14', 1821992);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1004, 'GL383563.3', 'chr17_GL383563v3_alt', 'GRCh38.p14', 375691);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1005, 'GL383564.2', 'chr17_GL383564v2_alt', 'GRCh38.p14', 133151);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1010, 'KI270857.1', 'chr17_KI270857v1_alt', 'GRCh38.p14', 2877074);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1011, 'KI270858.1', 'chr17_KI270858v1_alt', 'GRCh38.p14', 235827);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1012, 'KI270859.1', 'chr17_KI270859v1_alt', 'GRCh38.p14', 108763);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1013, 'KI270860.1', 'chr17_KI270860v1_alt', 'GRCh38.p14', 178921);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1014, 'KI270861.1', 'chr17_KI270861v1_alt', 'GRCh38.p14', 196688);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1015, 'KI270862.1', 'chr17_KI270862v1_alt', 'GRCh38.p14', 391357);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1022, 'KI270863.1', 'chr18_KI270863v1_alt', 'GRCh38.p14', 167999);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1025, 'KI270864.1', 'chr18_KI270864v1_alt', 'GRCh38.p14', 111737);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1031, 'KI270865.1', 'chr19_KI270865v1_alt', 'GRCh38.p14', 52969);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1032, 'KI270866.1', 'chr19_KI270866v1_alt', 'GRCh38.p14', 43156);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1033, 'KI270867.1', 'chr19_KI270867v1_alt', 'GRCh38.p14', 233762);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1034, 'KI270868.1', 'chr19_KI270868v1_alt', 'GRCh38.p14', 61734);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1035, 'GL383577.2', 'chr20_GL383577v2_alt', 'GRCh38.p14', 128386);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1036, 'KI270869.1', 'chr20_KI270869v1_alt', 'GRCh38.p14', 118774);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1037, 'KI270870.1', 'chr20_KI270870v1_alt', 'GRCh38.p14', 183433);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1038, 'KI270871.1', 'chr20_KI270871v1_alt', 'GRCh38.p14', 58661);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1039, 'GL383578.2', 'chr21_GL383578v2_alt', 'GRCh38.p14', 63917);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1040, 'GL383579.2', 'chr21_GL383579v2_alt', 'GRCh38.p14', 201197);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1041, 'GL383580.2', 'chr21_GL383580v2_alt', 'GRCh38.p14', 74653);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1042, 'GL383581.2', 'chr21_GL383581v2_alt', 'GRCh38.p14', 116689);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1043, 'KI270872.1', 'chr21_KI270872v1_alt', 'GRCh38.p14', 82692);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1044, 'KI270873.1', 'chr21_KI270873v1_alt', 'GRCh38.p14', 143900);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1045, 'KI270874.1', 'chr21_KI270874v1_alt', 'GRCh38.p14', 166743);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1047, 'GL383583.2', 'chr22_GL383583v2_alt', 'GRCh38.p14', 96924);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1048, 'KI270875.1', 'chr22_KI270875v1_alt', 'GRCh38.p14', 259914);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1049, 'KI270876.1', 'chr22_KI270876v1_alt', 'GRCh38.p14', 263666);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1050, 'KI270877.1', 'chr22_KI270877v1_alt', 'GRCh38.p14', 101331);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1051, 'KI270878.1', 'chr22_KI270878v1_alt', 'GRCh38.p14', 186262);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1052, 'KI270879.1', 'chr22_KI270879v1_alt', 'GRCh38.p14', 304135);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1053, 'KI270880.1', 'chrX_KI270880v1_alt', 'GRCh38.p14', 284869);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1054, 'KI270881.1', 'chrX_KI270881v1_alt', 'GRCh38.p14', 144206);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1055, 'KI270892.1', 'chr1_KI270892v1_alt', 'GRCh38.p14', 162212);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1056, 'KI270893.1', 'chr2_KI270893v1_alt', 'GRCh38.p14', 161218);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1057, 'KI270894.1', 'chr2_KI270894v1_alt', 'GRCh38.p14', 214158);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1058, 'KI270895.1', 'chr3_KI270895v1_alt', 'GRCh38.p14', 162896);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1059, 'KI270896.1', 'chr4_KI270896v1_alt', 'GRCh38.p14', 378547);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1060, 'KI270897.1', 'chr5_KI270897v1_alt', 'GRCh38.p14', 1144418);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1061, 'KI270898.1', 'chr5_KI270898v1_alt', 'GRCh38.p14', 130957);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1062, 'GL000251.2', 'chr6_GL000251v2_alt', 'GRCh38.p14', 4795265);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1063, 'KI270899.1', 'chr7_KI270899v1_alt', 'GRCh38.p14', 190869);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1064, 'KI270900.1', 'chr8_KI270900v1_alt', 'GRCh38.p14', 318687);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1065, 'KI270901.1', 'chr8_KI270901v1_alt', 'GRCh38.p14', 136959);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1066, 'KI270902.1', 'chr11_KI270902v1_alt', 'GRCh38.p14', 106711);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1067, 'KI270903.1', 'chr11_KI270903v1_alt', 'GRCh38.p14', 214625);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1068, 'KI270904.1', 'chr12_KI270904v1_alt', 'GRCh38.p14', 572349);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1069, 'KI270905.1', 'chr15_KI270905v1_alt', 'GRCh38.p14', 5161414);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1070, 'KI270906.1', 'chr15_KI270906v1_alt', 'GRCh38.p14', 196384);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1072, 'KI270907.1', 'chr17_KI270907v1_alt', 'GRCh38.p14', 137721);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1073, 'KI270908.1', 'chr17_KI270908v1_alt', 'GRCh38.p14', 1423190);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1074, 'KI270909.1', 'chr17_KI270909v1_alt', 'GRCh38.p14', 325800);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1075, 'KI270910.1', 'chr17_KI270910v1_alt', 'GRCh38.p14', 157099);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1076, 'KI270911.1', 'chr18_KI270911v1_alt', 'GRCh38.p14', 157710);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1077, 'KI270912.1', 'chr18_KI270912v1_alt', 'GRCh38.p14', 174061);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1078, 'GL949747.2', 'chr19_GL949747v2_alt', 'GRCh38.p14', 729520);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1080, 'KI270913.1', 'chrX_KI270913v1_alt', 'GRCh38.p14', 274009);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1081, 'KI270924.1', 'chr3_KI270924v1_alt', 'GRCh38.p14', 166540);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1082, 'KI270925.1', 'chr4_KI270925v1_alt', 'GRCh38.p14', 555799);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1083, 'GL000252.2', 'chr6_GL000252v2_alt', 'GRCh38.p14', 4604811);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1084, 'KI270926.1', 'chr8_KI270926v1_alt', 'GRCh38.p14', 229282);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1085, 'KI270927.1', 'chr11_KI270927v1_alt', 'GRCh38.p14', 218612);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1086, 'GL949748.2', 'chr19_GL949748v2_alt', 'GRCh38.p14', 1064304);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1087, 'KI270928.1', 'chr22_KI270928v1_alt', 'GRCh38.p14', 176103);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1088, 'KI270934.1', 'chr3_KI270934v1_alt', 'GRCh38.p14', 163458);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1089, 'GL000253.2', 'chr6_GL000253v2_alt', 'GRCh38.p14', 4677643);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1090, 'GL949749.2', 'chr19_GL949749v2_alt', 'GRCh38.p14', 1091841);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1091, 'KI270935.1', 'chr3_KI270935v1_alt', 'GRCh38.p14', 197351);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1092, 'GL000254.2', 'chr6_GL000254v2_alt', 'GRCh38.p14', 4827813);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1093, 'GL949750.2', 'chr19_GL949750v2_alt', 'GRCh38.p14', 1066390);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1094, 'KI270936.1', 'chr3_KI270936v1_alt', 'GRCh38.p14', 164170);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1095, 'GL000255.2', 'chr6_GL000255v2_alt', 'GRCh38.p14', 4606388);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1096, 'GL949751.2', 'chr19_GL949751v2_alt', 'GRCh38.p14', 1002683);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1097, 'KI270937.1', 'chr3_KI270937v1_alt', 'GRCh38.p14', 165607);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1098, 'GL000256.2', 'chr6_GL000256v2_alt', 'GRCh38.p14', 4929269);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1100, 'KI270758.1', 'chr6_KI270758v1_alt', 'GRCh38.p14', 76752);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1101, 'GL949753.2', 'chr19_GL949753v2_alt', 'GRCh38.p14', 796479);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1102, 'KI270938.1', 'chr19_KI270938v1_alt', 'GRCh38.p14', 1066800);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1103, 'KI270882.1', 'chr19_KI270882v1_alt', 'GRCh38.p14', 248807);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1104, 'KI270883.1', 'chr19_KI270883v1_alt', 'GRCh38.p14', 170399);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1105, 'KI270884.1', 'chr19_KI270884v1_alt', 'GRCh38.p14', 157053);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1106, 'KI270885.1', 'chr19_KI270885v1_alt', 'GRCh38.p14', 171027);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1107, 'KI270886.1', 'chr19_KI270886v1_alt', 'GRCh38.p14', 204239);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1108, 'KI270887.1', 'chr19_KI270887v1_alt', 'GRCh38.p14', 209512);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1109, 'KI270888.1', 'chr19_KI270888v1_alt', 'GRCh38.p14', 155532);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1110, 'KI270889.1', 'chr19_KI270889v1_alt', 'GRCh38.p14', 170698);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1111, 'KI270890.1', 'chr19_KI270890v1_alt', 'GRCh38.p14', 184499);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1112, 'KI270891.1', 'chr19_KI270891v1_alt', 'GRCh38.p14', 170680);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1113, 'KI270914.1', 'chr19_KI270914v1_alt', 'GRCh38.p14', 205194);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1114, 'KI270915.1', 'chr19_KI270915v1_alt', 'GRCh38.p14', 170665);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1115, 'KI270916.1', 'chr19_KI270916v1_alt', 'GRCh38.p14', 184516);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1116, 'KI270917.1', 'chr19_KI270917v1_alt', 'GRCh38.p14', 190932);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1117, 'KI270918.1', 'chr19_KI270918v1_alt', 'GRCh38.p14', 123111);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1118, 'KI270919.1', 'chr19_KI270919v1_alt', 'GRCh38.p14', 170701);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1119, 'KI270920.1', 'chr19_KI270920v1_alt', 'GRCh38.p14', 198005);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1120, 'KI270921.1', 'chr19_KI270921v1_alt', 'GRCh38.p14', 282224);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1121, 'KI270922.1', 'chr19_KI270922v1_alt', 'GRCh38.p14', 187935);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1122, 'KI270923.1', 'chr19_KI270923v1_alt', 'GRCh38.p14', 189352);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1123, 'KI270929.1', 'chr19_KI270929v1_alt', 'GRCh38.p14', 186203);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1124, 'KI270930.1', 'chr19_KI270930v1_alt', 'GRCh38.p14', 200773);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1130, 'KI270931.1', 'chr19_KI270931v1_alt', 'GRCh38.p14', 170148);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1131, 'KI270932.1', 'chr19_KI270932v1_alt', 'GRCh38.p14', 215732);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1132, 'KI270933.1', 'chr19_KI270933v1_alt', 'GRCh38.p14', 170537);
-INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1133, 'GL000209.2', 'chr19_GL000209v2_alt', 'GRCh38.p14', 177381);
-
-
-# --- !Downs
-truncate genbank_contig restart identity cascade;
\ No newline at end of file
diff --git a/conf/evolutions/default/20.sql b/conf/evolutions/default/20.sql
deleted file mode 100644
index 90c50264..00000000
--- a/conf/evolutions/default/20.sql
+++ /dev/null
@@ -1,94 +0,0 @@
-# --- !Ups
-
--- Create new alignment_metadata table for linear references
-CREATE TABLE public.alignment_metadata
-(
- id bigserial
- PRIMARY KEY,
- sequence_file_id bigint NOT NULL
- REFERENCES public.sequence_file
- ON DELETE CASCADE,
- genbank_contig_id integer NOT NULL
- REFERENCES public.genbank_contig
- ON DELETE CASCADE,
- metric_level varchar(50) NOT NULL
- CONSTRAINT alignment_metadata_metric_level_check
- CHECK ((metric_level)::text = ANY
- ((ARRAY ['CONTIG_OVERALL'::character varying, 'REGION'::character varying])::text[])),
- region_name varchar(255),
- region_start_pos bigint,
- region_end_pos bigint,
- region_length_bp bigint,
- metrics_date timestamp DEFAULT now() NOT NULL,
- analysis_tool varchar(255) NOT NULL,
- analysis_tool_version varchar(50),
- notes text,
- metadata jsonb,
- CONSTRAINT valid_region_coordinates
- CHECK (
- (metric_level = 'CONTIG_OVERALL' AND region_start_pos IS NULL AND region_end_pos IS NULL)
- OR
- (metric_level = 'REGION' AND region_start_pos IS NOT NULL AND region_end_pos IS NOT NULL
- AND region_start_pos > 0 AND region_end_pos >= region_start_pos)
- )
-);
-
--- Create new alignment_coverage table
-CREATE TABLE public.alignment_coverage
-(
- alignment_metadata_id bigint NOT NULL
- PRIMARY KEY
- REFERENCES public.alignment_metadata
- ON DELETE CASCADE,
- mean_depth double precision,
- median_depth double precision,
- percent_coverage_at_1x double precision,
- percent_coverage_at_5x double precision,
- percent_coverage_at_10x double precision,
- percent_coverage_at_20x double precision,
- percent_coverage_at_30x double precision,
- bases_no_coverage bigint,
- bases_low_quality_mapping bigint,
- bases_callable bigint,
- mean_mapping_quality double precision
-);
-
--- Create indices for efficient querying
-CREATE INDEX idx_alignment_metadata_sequence_file
- ON public.alignment_metadata(sequence_file_id);
-
-CREATE INDEX idx_alignment_metadata_genbank_contig
- ON public.alignment_metadata(genbank_contig_id);
-
-CREATE INDEX idx_alignment_metadata_metric_level
- ON public.alignment_metadata(metric_level);
-
-CREATE INDEX idx_alignment_metadata_region
- ON public.alignment_metadata(genbank_contig_id, region_start_pos, region_end_pos)
- WHERE metric_level = 'REGION';
-
--- Remove pangenome_path_id from genbank_contig
-ALTER TABLE public.genbank_contig
- DROP COLUMN IF EXISTS pangenome_path_id;
-
--- Add comment explaining the migration
-COMMENT ON TABLE public.alignment_metadata IS
- 'Linear reference-based alignment statistics. Replaces pangenome_alignment_metadata.';
-
-COMMENT ON TABLE public.alignment_coverage IS
- 'Coverage statistics for linear reference alignments. Replaces pangenome_alignment_coverage.';
-
-# --- !Downs
-
--- Restore pangenome_path_id to genbank_contig
-ALTER TABLE public.genbank_contig
- ADD COLUMN IF NOT EXISTS pangenome_path_id integer;
-
--- Drop the new linear reference tables and their indices
-DROP INDEX IF EXISTS public.idx_alignment_metadata_region;
-DROP INDEX IF EXISTS public.idx_alignment_metadata_metric_level;
-DROP INDEX IF EXISTS public.idx_alignment_metadata_genbank_contig;
-DROP INDEX IF EXISTS public.idx_alignment_metadata_sequence_file;
-
-DROP TABLE IF EXISTS public.alignment_coverage;
-DROP TABLE IF EXISTS public.alignment_metadata;
\ No newline at end of file
diff --git a/conf/evolutions/default/21.sql b/conf/evolutions/default/21.sql
deleted file mode 100644
index 2825b7ff..00000000
--- a/conf/evolutions/default/21.sql
+++ /dev/null
@@ -1,36 +0,0 @@
-# --- !Ups
-create table public.sequencing_lab
-(
- id serial
- primary key,
- name varchar(255) not null
- unique,
- is_d2c boolean default false not null,
- website_url varchar(255), -- URL to the lab's official website
- description_markdown text, -- Rich text description (e.g., accreditation, methods)
- created_at timestamp not null default now(),
- updated_at timestamp
-);
-
-create table public.sequencer_instrument
-(
- id serial
- primary key,
- instrument_id varchar(255) not null
- unique, -- The ID found in the BAM/CRAM read header (e.g., 'A00123')
- lab_id integer not null
- references public.sequencing_lab (id), -- Foreign key to the lab
- manufacturer varchar(255), -- Optional: e.g., 'Illumina', 'PacBio'
- model varchar(255), -- Optional: e.g., 'NovaSeq 6000', 'MiSeq'
- created_at timestamp not null default now(),
- updated_at timestamp
-);
-
--- An index to optimize lookups by the instrument ID for the API
-create unique index sequencer_instrument_instrument_id_uindex
- on public.sequencer_instrument (instrument_id);
-
-# --- !Downs
-
-drop table public.sequencer_instrument;
-drop table public.sequencing_lab;
diff --git a/conf/evolutions/default/22.sql b/conf/evolutions/default/22.sql
deleted file mode 100644
index 52ca84a4..00000000
--- a/conf/evolutions/default/22.sql
+++ /dev/null
@@ -1,60 +0,0 @@
-# --- !Ups
-ALTER TABLE citizen_biosample RENAME COLUMN citizen_biosample_did TO at_uri;
-ALTER TABLE citizen_biosample ADD COLUMN deleted BOOLEAN DEFAULT false NOT NULL;
-ALTER TABLE citizen_biosample ADD COLUMN at_cid VARCHAR(255);
-ALTER TABLE citizen_biosample ADD COLUMN created_at TIMESTAMP DEFAULT now() NOT NULL;
-ALTER TABLE citizen_biosample ADD COLUMN updated_at TIMESTAMP DEFAULT now() NOT NULL;
-ALTER TABLE citizen_biosample ADD COLUMN accession VARCHAR(255);
-ALTER TABLE citizen_biosample ADD COLUMN alias VARCHAR(255);
-ALTER TABLE citizen_biosample ADD COLUMN y_haplogroup JSONB;
-ALTER TABLE citizen_biosample ADD COLUMN mt_haplogroup JSONB;
-CREATE UNIQUE INDEX citizen_biosample_accession_uindex ON citizen_biosample (accession);
-
-CREATE TABLE publication_citizen_biosample
-(
- publication_id INT REFERENCES publication (id) ON DELETE CASCADE,
- citizen_biosample_id INT REFERENCES citizen_biosample (id) ON DELETE CASCADE,
- PRIMARY KEY (publication_id, citizen_biosample_id)
-);
-
-CREATE TABLE citizen_biosample_original_haplogroup
-(
- id SERIAL PRIMARY KEY,
- citizen_biosample_id INT REFERENCES citizen_biosample (id) ON DELETE CASCADE,
- publication_id INT REFERENCES publication (id) ON DELETE CASCADE,
- y_haplogroup_result JSONB,
- mt_haplogroup_result JSONB,
- notes TEXT,
- UNIQUE (citizen_biosample_id, publication_id)
-);
-
-CREATE TABLE project
-(
- id SERIAL PRIMARY KEY,
- project_guid UUID NOT NULL UNIQUE,
- name VARCHAR(255) NOT NULL,
- description TEXT,
- owner_did VARCHAR(255) NOT NULL,
- created_at TIMESTAMP NOT NULL DEFAULT now(),
- updated_at TIMESTAMP NOT NULL DEFAULT now(),
- deleted BOOLEAN DEFAULT false NOT NULL,
- at_uri VARCHAR(255),
- at_cid VARCHAR(255)
-);
-
-CREATE UNIQUE INDEX project_at_uri_uindex ON project (at_uri);
-
-# --- !Downs
-DROP TABLE project;
-DROP TABLE citizen_biosample_original_haplogroup;
-DROP TABLE publication_citizen_biosample;
-DROP INDEX citizen_biosample_accession_uindex;
-ALTER TABLE citizen_biosample DROP COLUMN mt_haplogroup;
-ALTER TABLE citizen_biosample DROP COLUMN y_haplogroup;
-ALTER TABLE citizen_biosample DROP COLUMN alias;
-ALTER TABLE citizen_biosample DROP COLUMN accession;
-ALTER TABLE citizen_biosample DROP COLUMN updated_at;
-ALTER TABLE citizen_biosample DROP COLUMN created_at;
-ALTER TABLE citizen_biosample DROP COLUMN at_cid;
-ALTER TABLE citizen_biosample DROP COLUMN deleted;
-ALTER TABLE citizen_biosample RENAME COLUMN at_uri TO citizen_biosample_did;
diff --git a/conf/evolutions/default/23.sql b/conf/evolutions/default/23.sql
deleted file mode 100644
index 7940b504..00000000
--- a/conf/evolutions/default/23.sql
+++ /dev/null
@@ -1,9 +0,0 @@
-# --- !Ups
-ALTER TABLE biosample_original_haplogroup ADD COLUMN y_haplogroup_result JSONB;
-ALTER TABLE biosample_original_haplogroup ADD COLUMN mt_haplogroup_result JSONB;
-ALTER TABLE specimen_donor RENAME COLUMN citizen_biosample_did TO at_uri;
-
-# --- !Downs
-ALTER TABLE specimen_donor RENAME COLUMN at_uri TO citizen_biosample_did;
-ALTER TABLE biosample_original_haplogroup DROP COLUMN mt_haplogroup_result;
-ALTER TABLE biosample_original_haplogroup DROP COLUMN y_haplogroup_result;
diff --git a/conf/evolutions/default/24.sql b/conf/evolutions/default/24.sql
deleted file mode 100644
index c1239be0..00000000
--- a/conf/evolutions/default/24.sql
+++ /dev/null
@@ -1,7 +0,0 @@
-# --- !Ups
-ALTER TABLE citizen_biosample ADD COLUMN specimen_donor_id INT REFERENCES specimen_donor(id);
-CREATE INDEX citizen_biosample_specimen_donor_id_idx ON citizen_biosample(specimen_donor_id);
-
-# --- !Downs
-DROP INDEX citizen_biosample_specimen_donor_id_idx;
-ALTER TABLE citizen_biosample DROP COLUMN specimen_donor_id;
diff --git a/conf/evolutions/default/25.sql b/conf/evolutions/default/25.sql
deleted file mode 100644
index 38213a72..00000000
--- a/conf/evolutions/default/25.sql
+++ /dev/null
@@ -1,37 +0,0 @@
-# --- !Ups
-
-ALTER TABLE sequence_library ADD COLUMN at_uri VARCHAR(255);
-ALTER TABLE sequence_library ADD COLUMN at_cid VARCHAR(255);
-CREATE INDEX idx_sequence_library_at_uri ON sequence_library(at_uri);
-
-ALTER TABLE alignment_metadata ADD COLUMN reference_build VARCHAR(255);
-ALTER TABLE alignment_metadata ADD COLUMN variant_caller VARCHAR(255);
-ALTER TABLE alignment_metadata ADD COLUMN genome_territory BIGINT;
-ALTER TABLE alignment_metadata ADD COLUMN mean_coverage DOUBLE PRECISION;
-ALTER TABLE alignment_metadata ADD COLUMN median_coverage DOUBLE PRECISION;
-ALTER TABLE alignment_metadata ADD COLUMN sd_coverage DOUBLE PRECISION;
-ALTER TABLE alignment_metadata ADD COLUMN pct_exc_dupe DOUBLE PRECISION;
-ALTER TABLE alignment_metadata ADD COLUMN pct_exc_mapq DOUBLE PRECISION;
-ALTER TABLE alignment_metadata ADD COLUMN pct_10x DOUBLE PRECISION;
-ALTER TABLE alignment_metadata ADD COLUMN pct_20x DOUBLE PRECISION;
-ALTER TABLE alignment_metadata ADD COLUMN pct_30x DOUBLE PRECISION;
-ALTER TABLE alignment_metadata ADD COLUMN het_snp_sensitivity DOUBLE PRECISION;
-
-# --- !Downs
-
-ALTER TABLE alignment_metadata DROP COLUMN het_snp_sensitivity;
-ALTER TABLE alignment_metadata DROP COLUMN pct_30x;
-ALTER TABLE alignment_metadata DROP COLUMN pct_20x;
-ALTER TABLE alignment_metadata DROP COLUMN pct_10x;
-ALTER TABLE alignment_metadata DROP COLUMN pct_exc_mapq;
-ALTER TABLE alignment_metadata DROP COLUMN pct_exc_dupe;
-ALTER TABLE alignment_metadata DROP COLUMN sd_coverage;
-ALTER TABLE alignment_metadata DROP COLUMN median_coverage;
-ALTER TABLE alignment_metadata DROP COLUMN mean_coverage;
-ALTER TABLE alignment_metadata DROP COLUMN genome_territory;
-ALTER TABLE alignment_metadata DROP COLUMN variant_caller;
-ALTER TABLE alignment_metadata DROP COLUMN reference_build;
-
-DROP INDEX idx_sequence_library_at_uri;
-ALTER TABLE sequence_library DROP COLUMN at_cid;
-ALTER TABLE sequence_library DROP COLUMN at_uri;
diff --git a/conf/evolutions/default/26.sql b/conf/evolutions/default/26.sql
deleted file mode 100644
index 7ba0986f..00000000
--- a/conf/evolutions/default/26.sql
+++ /dev/null
@@ -1,110 +0,0 @@
-# --- !Ups
-CREATE SCHEMA IF NOT EXISTS social;
-
--- 1. Move existing reputation tables to social schema
--- NOTE: We must drop constraints that reference these tables if they are not schema-qualified or if necessary,
--- but typically changing schema preserves data. Foreign keys might need adjustment if they are schema-bound.
--- In Postgres, moving a table to a new schema preserves its data and indexes.
--- However, we should be careful about the FKs from public.users.
--- The existing FKs in 6.sql were:
--- fk_reputation_events_user_id references public.users
--- fk_reputation_events_event_type_id references public.reputation_event_types
--- fk_reputation_events_source_user_id references public.users
--- fk_user_reputation_scores_user_id references public.users
-
-ALTER TABLE public.reputation_event_types SET SCHEMA social;
-ALTER TABLE public.reputation_events SET SCHEMA social;
-ALTER TABLE public.user_reputation_scores SET SCHEMA social;
-
--- 2. Create new social tables
-
--- User Relationships (Foes/Blocks)
-CREATE TABLE social.user_blocks (
- blocker_did VARCHAR(255) NOT NULL,
- blocked_did VARCHAR(255) NOT NULL,
- reason TEXT,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- PRIMARY KEY (blocker_did, blocked_did)
-);
-
-CREATE INDEX idx_user_blocks_blocker ON social.user_blocks(blocker_did);
-CREATE INDEX idx_user_blocks_blocked ON social.user_blocks(blocked_did);
-
--- Conversations (Threads)
-CREATE TABLE social.conversations (
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- type VARCHAR(50) NOT NULL, -- 'DIRECT', 'GROUP', 'SYSTEM', 'RECRUITMENT'
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW()
-);
-
--- Conversation Participants
-CREATE TABLE social.conversation_participants (
- conversation_id UUID NOT NULL,
- user_did VARCHAR(255) NOT NULL,
- role VARCHAR(50) DEFAULT 'MEMBER', -- 'ADMIN', 'MEMBER'
- last_read_at TIMESTAMP,
- joined_at TIMESTAMP NOT NULL DEFAULT NOW(),
- PRIMARY KEY (conversation_id, user_did),
- CONSTRAINT fk_conversation_participants_conversation_id FOREIGN KEY (conversation_id) REFERENCES social.conversations(id) ON DELETE CASCADE
-);
-
--- Messages
-CREATE TABLE social.messages (
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- conversation_id UUID NOT NULL,
- sender_did VARCHAR(255) NOT NULL, -- User DID or 'SYSTEM'
- content TEXT NOT NULL,
- content_type VARCHAR(50) DEFAULT 'TEXT', -- 'TEXT', 'MARKDOWN', 'JSON_PAYLOAD'
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- is_edited BOOLEAN DEFAULT FALSE,
- CONSTRAINT fk_messages_conversation_id FOREIGN KEY (conversation_id) REFERENCES social.conversations(id) ON DELETE CASCADE
-);
-
-CREATE INDEX idx_messages_conversation_id ON social.messages(conversation_id);
-CREATE INDEX idx_messages_sender_did ON social.messages(sender_did);
-
--- Feed Posts (Public/Community)
-CREATE TABLE social.feed_posts (
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- author_did VARCHAR(255) NOT NULL,
- content TEXT NOT NULL,
- parent_post_id UUID, -- For replies
- root_post_id UUID, -- Thread context
- topic VARCHAR(100), -- 'GENERAL', 'HAPLOGROUP_R', etc.
- author_reputation_score INT DEFAULT 0, -- Snapshot at time of posting
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
- CONSTRAINT fk_feed_posts_parent_id FOREIGN KEY (parent_post_id) REFERENCES social.feed_posts(id),
- CONSTRAINT fk_feed_posts_root_id FOREIGN KEY (root_post_id) REFERENCES social.feed_posts(id)
-);
-
-CREATE INDEX idx_feed_posts_author_did ON social.feed_posts(author_did);
-CREATE INDEX idx_feed_posts_topic ON social.feed_posts(topic);
-CREATE INDEX idx_feed_posts_created_at ON social.feed_posts(created_at);
-
--- 3. Seed initial reputation event types
-INSERT INTO social.reputation_event_types (name, description, default_points_change, is_positive, is_system_generated) VALUES
-('ACCOUNT_VERIFIED', 'Email and identity verification complete', 10, TRUE, TRUE),
-('LAB_OBSERVATION_ACCEPTED', 'Submitted sequencer metadata verified by consensus', 5, TRUE, TRUE),
-('FEED_POST_UPVOTED', 'Community member upvoted a post', 1, TRUE, FALSE),
-('FEED_POST_DOWNVOTED', 'Community member downvoted a post', -1, FALSE, FALSE),
-('SPAM_REPORT_VALIDATED', 'Content marked as spam by moderator or consensus', -50, FALSE, TRUE),
-('RECRUITMENT_ACCEPTED', 'User accepted a recruitment request', 2, TRUE, FALSE),
-('NEW_USER_BONUS', 'Welcome bonus for new users', 5, TRUE, TRUE)
-ON CONFLICT (name) DO NOTHING;
-
-# --- !Downs
-
-DROP TABLE social.feed_posts;
-DROP TABLE social.messages;
-DROP TABLE social.conversation_participants;
-DROP TABLE social.conversations;
-DROP TABLE social.user_blocks;
-
--- Move tables back to public
-ALTER TABLE social.user_reputation_scores SET SCHEMA public;
-ALTER TABLE social.reputation_events SET SCHEMA public;
-ALTER TABLE social.reputation_event_types SET SCHEMA public;
-
-DROP SCHEMA social;
diff --git a/conf/evolutions/default/27.sql b/conf/evolutions/default/27.sql
deleted file mode 100644
index 2fa98fae..00000000
--- a/conf/evolutions/default/27.sql
+++ /dev/null
@@ -1,106 +0,0 @@
--- !Ups
-
-ALTER TABLE sequence_file ADD COLUMN checksums JSONB DEFAULT '[]'::jsonb;
-ALTER TABLE sequence_file ADD COLUMN http_locations JSONB DEFAULT '[]'::jsonb;
-ALTER TABLE sequence_file ADD COLUMN atp_location JSONB;
-
-UPDATE sequence_file sf SET checksums = (
- SELECT COALESCE(jsonb_agg(to_jsonb(sfc) - 'sequence_file_id'), '[]'::jsonb)
- FROM sequence_file_checksum sfc WHERE sfc.sequence_file_id = sf.id
-);
-
-UPDATE sequence_file sf SET http_locations = (
- SELECT COALESCE(jsonb_agg(to_jsonb(shl) - 'sequence_file_id'), '[]'::jsonb)
- FROM sequence_http_location shl WHERE shl.sequence_file_id = sf.id
-);
-
-UPDATE sequence_file sf SET atp_location = (
- SELECT to_jsonb(sal) - 'sequence_file_id'
- FROM sequence_atp_location sal WHERE sal.sequence_file_id = sf.id
-);
-
-CREATE INDEX idx_sf_checksums ON sequence_file USING GIN (checksums jsonb_path_ops);
-CREATE INDEX idx_sf_http_locations ON sequence_file USING GIN (http_locations jsonb_path_ops);
-CREATE INDEX idx_sf_atp_location ON sequence_file USING GIN (atp_location jsonb_path_ops);
-
-DROP TABLE sequence_file_checksum;
-DROP TABLE sequence_http_location;
-DROP TABLE sequence_atp_location;
-
--- !Downs
-
-CREATE TABLE sequence_file_checksum (
- id BIGSERIAL PRIMARY KEY,
- sequence_file_id INT NOT NULL,
- checksum VARCHAR(255) NOT NULL,
- algorithm VARCHAR(50) NOT NULL,
- verified_at TIMESTAMP WITH TIME ZONE,
- created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
- FOREIGN KEY (sequence_file_id) REFERENCES sequence_file(id) ON DELETE CASCADE,
- UNIQUE (sequence_file_id, algorithm)
-);
-
-CREATE TABLE sequence_http_location (
- id BIGSERIAL PRIMARY KEY,
- sequence_file_id INT NOT NULL,
- url VARCHAR(2048) NOT NULL,
- url_hash VARCHAR(64) NOT NULL,
- created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
- FOREIGN KEY (sequence_file_id) REFERENCES sequence_file(id) ON DELETE CASCADE,
- UNIQUE (sequence_file_id, url_hash)
-);
-
-CREATE TABLE sequence_atp_location (
- id BIGSERIAL PRIMARY KEY,
- sequence_file_id INT NOT NULL,
- repo_did VARCHAR(255) NOT NULL,
- record_uri VARCHAR(255) NOT NULL,
- cid VARCHAR(255) NOT NULL,
- created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
- FOREIGN KEY (sequence_file_id) REFERENCES sequence_file(id) ON DELETE CASCADE,
- UNIQUE (sequence_file_id, record_uri)
-);
-
--- Re-populate sequence_file_checksum from sequence_file.checksums (simplified, assumes single checksum for now)
-INSERT INTO sequence_file_checksum (sequence_file_id, checksum, algorithm, verified_at, created_at, updated_at)
-SELECT
- sf.id,
- (jsonb_array_elements(sf.checksums)->>'checksum')::VARCHAR,
- (jsonb_array_elements(sf.checksums)->>'algorithm')::VARCHAR,
- (jsonb_array_elements(sf.checksums)->>'verified_at')::TIMESTAMP WITH TIME ZONE,
- (jsonb_array_elements(sf.checksums)->>'created_at')::TIMESTAMP WITH TIME ZONE,
- (jsonb_array_elements(sf.checksums)->>'updated_at')::TIMESTAMP WITH TIME ZONE
-FROM sequence_file sf
-WHERE jsonb_array_length(sf.checksums) > 0;
-
-
--- Re-populate sequence_http_location from sequence_file.http_locations (simplified)
-INSERT INTO sequence_http_location (sequence_file_id, url, url_hash, created_at, updated_at)
-SELECT
- sf.id,
- (jsonb_array_elements(sf.http_locations)->>'url')::VARCHAR,
- (jsonb_array_elements(sf.http_locations)->>'url_hash')::VARCHAR,
- (jsonb_array_elements(sf.http_locations)->>'created_at')::TIMESTAMP WITH TIME ZONE,
- (jsonb_array_elements(sf.http_locations)->>'updated_at')::TIMESTAMP WITH TIME ZONE
-FROM sequence_file sf
-WHERE jsonb_array_length(sf.http_locations) > 0;
-
--- Re-populate sequence_atp_location from sequence_file.atp_location (simplified)
-INSERT INTO sequence_atp_location (sequence_file_id, repo_did, record_uri, cid, created_at, updated_at)
-SELECT
- sf.id,
- (sf.atp_location->>'repo_did')::VARCHAR,
- (sf.atp_location->>'record_uri')::VARCHAR,
- (sf.atp_location->>'cid')::VARCHAR,
- (sf.atp_location->>'created_at')::TIMESTAMP WITH TIME ZONE,
- (sf.atp_location->>'updated_at')::TIMESTAMP WITH TIME ZONE
-FROM sequence_file sf
-WHERE sf.atp_location IS NOT NULL;
-
-
-ALTER TABLE sequence_file DROP COLUMN checksums;
-ALTER TABLE sequence_file DROP COLUMN http_locations;
-ALTER TABLE sequence_file DROP COLUMN atp_location;
diff --git a/conf/evolutions/default/28.sql b/conf/evolutions/default/28.sql
deleted file mode 100644
index d0ded6b9..00000000
--- a/conf/evolutions/default/28.sql
+++ /dev/null
@@ -1,23 +0,0 @@
--- !Ups
-
--- 1. Create the 'tree' schema if it doesn't already exist
-CREATE SCHEMA IF NOT EXISTS tree;
-
--- 2. Migrate existing haplogroup tables from 'public' schema to 'tree' schema
-ALTER TABLE public.haplogroup SET SCHEMA tree;
-ALTER TABLE public.haplogroup_relationship SET SCHEMA tree;
-ALTER TABLE public.haplogroup_variant SET SCHEMA tree;
-ALTER TABLE public.haplogroup_variant_metadata SET SCHEMA tree;
-ALTER TABLE public.relationship_revision_metadata SET SCHEMA tree;
-
--- !Downs
-
--- 1. Revert haplogroup tables from 'tree' schema back to 'public' schema
-ALTER TABLE tree.haplogroup SET SCHEMA public;
-ALTER TABLE tree.haplogroup_relationship SET SCHEMA public;
-ALTER TABLE tree.haplogroup_variant SET SCHEMA public;
-ALTER TABLE tree.haplogroup_variant_metadata SET SCHEMA public;
-ALTER TABLE tree.relationship_revision_metadata SET SCHEMA public;
-
--- 2. Drop the 'tree' schema if it exists (CASCADE will remove tables within it)
-DROP SCHEMA IF EXISTS tree CASCADE;
diff --git a/conf/evolutions/default/29.sql b/conf/evolutions/default/29.sql
deleted file mode 100644
index 7f8e9db1..00000000
--- a/conf/evolutions/default/29.sql
+++ /dev/null
@@ -1,63 +0,0 @@
--- !Ups
-
--- Create ENUM types first
-CREATE TYPE data_generation_method AS ENUM ('SEQUENCING', 'GENOTYPING');
-CREATE TYPE target_type AS ENUM ('WHOLE_GENOME', 'Y_CHROMOSOME', 'MT_DNA', 'AUTOSOMAL', 'X_CHROMOSOME', 'MIXED');
-
--- Create the test_type_definition table
-CREATE TABLE test_type_definition (
- id SERIAL PRIMARY KEY,
- code VARCHAR(50) NOT NULL UNIQUE, -- Maps to TestTypeRow.code
- display_name VARCHAR(100) NOT NULL, -- Maps to TestTypeRow.displayName
- category data_generation_method NOT NULL, -- Maps to TestTypeRow.category
- vendor VARCHAR(100), -- Maps to TestTypeRow.vendor
- target_type target_type NOT NULL, -- Maps to TestTypeRow.targetType
- expected_min_depth DOUBLE PRECISION, -- Maps to TestTypeRow.expectedMinDepth
- expected_target_depth DOUBLE PRECISION, -- Maps to TestTypeRow.expectedTargetDepth
- expected_marker_count INTEGER, -- Maps to TestTypeRow.expectedMarkerCount
- supports_haplogroup_y BOOLEAN NOT NULL DEFAULT FALSE, -- Maps to TestTypeRow.supportsHaplogroupY
- supports_haplogroup_mt BOOLEAN NOT NULL DEFAULT FALSE, -- Maps to TestTypeRow.supportsHaplogroupMt
- supports_autosomal_ibd BOOLEAN NOT NULL DEFAULT FALSE, -- Maps to TestTypeRow.supportsAutosomalIbd
- supports_ancestry BOOLEAN NOT NULL DEFAULT FALSE, -- Maps to TestTypeRow.supportsAncestry
- typical_file_formats TEXT[], -- Maps to TestTypeRow.typicalFileFormats
- version VARCHAR(20), -- Maps to TestTypeRow.version
- release_date DATE, -- Maps to TestTypeRow.releaseDate
- deprecated_at DATE, -- Maps to TestTypeRow.deprecatedAt
- successor_test_type_id INTEGER REFERENCES test_type_definition(id), -- Maps to TestTypeRow.successorTestTypeId
- description TEXT, -- Maps to TestTypeRow.description
- documentation_url VARCHAR(500) -- Maps to TestTypeRow.documentationUrl
-);
-
--- Insert initial data for known test types
-INSERT INTO test_type_definition (
- code, display_name, category, vendor, target_type, expected_target_depth,
- supports_haplogroup_y, supports_haplogroup_mt, supports_autosomal_ibd, supports_ancestry,
- typical_file_formats, description
-) VALUES
-('WGS', 'Whole Genome Sequencing', 'SEQUENCING', NULL, 'WHOLE_GENOME', 30.0,
- TRUE, TRUE, TRUE, TRUE, ARRAY['BAM', 'CRAM', 'VCF'], 'Standard whole genome sequencing.'),
-
-('WES', 'Whole Exome Sequencing', 'SEQUENCING', NULL, 'AUTOSOMAL', 100.0,
- FALSE, FALSE, FALSE, FALSE, ARRAY['BAM', 'VCF'], 'Whole exome sequencing.'),
-
-('TARGETED_Y', 'Targeted Y-DNA Sequencing', 'SEQUENCING', NULL, 'Y_CHROMOSOME', 50.0,
- TRUE, FALSE, FALSE, FALSE, ARRAY['BAM', 'VCF', 'BED'], 'Targeted sequencing of Y-chromosome.'),
-
-('TARGETED_MT', 'Targeted mtDNA Sequencing', 'SEQUENCING', NULL, 'MT_DNA', 1000.0,
- FALSE, TRUE, FALSE, FALSE, ARRAY['BAM', 'FASTA', 'VCF'], 'Targeted sequencing of mitochondrial DNA.'),
-
-('SNP_ARRAY_23ANDME', '23andMe v5 Chip', 'GENOTYPING', '23andMe', 'MIXED', NULL,
- TRUE, TRUE, TRUE, TRUE, ARRAY['TXT', 'CSV'], 'SNP Array data from 23andMe v5.'),
-
-('SNP_ARRAY_ANCESTRY', 'AncestryDNA v2', 'GENOTYPING', 'AncestryDNA', 'MIXED', NULL,
- TRUE, TRUE, TRUE, TRUE, ARRAY['TXT', 'CSV'], 'SNP Array data from AncestryDNA v2.'),
-
-('ARRAY_FTDNA_FF', 'FTDNA Family Finder', 'GENOTYPING', 'FamilyTreeDNA', 'AUTOSOMAL', NULL,
- FALSE, FALSE, TRUE, TRUE, ARRAY['CSV'], 'FTDNA Family Finder autosomal chip data.');
-
--- !Downs
-
--- Drop the test_type_definition table
-DROP TABLE IF EXISTS test_type_definition CASCADE;
-DROP TYPE IF EXISTS data_generation_method;
-DROP TYPE IF EXISTS target_type;
\ No newline at end of file
diff --git a/conf/evolutions/default/3.sql b/conf/evolutions/default/3.sql
deleted file mode 100644
index 229b55f6..00000000
--- a/conf/evolutions/default/3.sql
+++ /dev/null
@@ -1,10 +0,0 @@
-# --- !Ups
---- Add author and abstract columns to publications
-
-ALTER TABLE publication ADD COLUMN authors VARCHAR(1000) NULL;
-ALTER TABLE publication ADD COLUMN abstract_summary TEXT NULL;
-
-# --- !Downs
-
-ALTER TABLE publication DROP COLUMN abstract_summary;
-ALTER TABLE publication DROP COLUMN authors;
\ No newline at end of file
diff --git a/conf/evolutions/default/30.sql b/conf/evolutions/default/30.sql
deleted file mode 100644
index d7af5b04..00000000
--- a/conf/evolutions/default/30.sql
+++ /dev/null
@@ -1,40 +0,0 @@
--- !Ups
-
--- 1. Add new test_type_id column
-ALTER TABLE sequence_library ADD COLUMN test_type_id INTEGER;
-
--- 2. Migrate data from old test_type string to new test_type_id using test_type_definition
-UPDATE sequence_library sl
-SET test_type_id = ttd.id
-FROM test_type_definition ttd
-WHERE UPPER(sl.test_type) = ttd.code; -- Corrected to ttd.code
-
--- 3. Add foreign key constraint
-ALTER TABLE sequence_library
-ADD CONSTRAINT fk_sequence_library_test_type
-FOREIGN KEY (test_type_id) REFERENCES test_type_definition(id) ON DELETE RESTRICT;
-
--- 4. Make the test_type_id column NOT NULL (if all existing data could be migrated)
--- Note: If there are unmappable values in sequence_library.test_type, this step will fail.
--- Assuming all existing values map to an entry in test_type_definition.
-ALTER TABLE sequence_library ALTER COLUMN test_type_id SET NOT NULL;
-
--- 5. Drop the old test_type column (optional, can be done later after verification)
-ALTER TABLE sequence_library DROP COLUMN test_type;
-
--- !Downs
-
--- 1. Re-add the old test_type column
-ALTER TABLE sequence_library ADD COLUMN test_type VARCHAR(255);
-
--- 2. Migrate data back from test_type_id to test_type string
-UPDATE sequence_library sl
-SET test_type = ttd.code
-FROM test_type_definition ttd
-WHERE sl.test_type_id = ttd.id;
-
--- 3. Drop foreign key constraint
-ALTER TABLE sequence_library DROP CONSTRAINT IF EXISTS fk_sequence_library_test_type;
-
--- 4. Drop the new test_type_id column
-ALTER TABLE sequence_library DROP COLUMN test_type_id;
diff --git a/conf/evolutions/default/31.sql b/conf/evolutions/default/31.sql
deleted file mode 100644
index 02b6b29e..00000000
--- a/conf/evolutions/default/31.sql
+++ /dev/null
@@ -1,55 +0,0 @@
--- !Ups
-
-CREATE TABLE publication_candidates (
- id SERIAL PRIMARY KEY,
- openalex_id VARCHAR(255) UNIQUE NOT NULL,
- doi VARCHAR(255),
- title TEXT NOT NULL,
- abstract TEXT,
- publication_date DATE,
- journal_name VARCHAR(500),
- relevance_score DOUBLE PRECISION,
- discovery_date TIMESTAMP DEFAULT NOW(),
- status VARCHAR(50) DEFAULT 'pending', -- pending, accepted, rejected, deferred
- reviewed_by UUID,
- reviewed_at TIMESTAMP,
- rejection_reason TEXT,
- raw_metadata JSONB, -- Full OpenAlex response
- FOREIGN KEY (reviewed_by) REFERENCES public.users(id) ON DELETE SET NULL
-);
-
-CREATE TABLE publication_search_configs (
- id SERIAL PRIMARY KEY,
- name VARCHAR(255) NOT NULL,
- search_query TEXT NOT NULL, -- OpenAlex query string
- concepts JSONB, -- OpenAlex concept IDs to filter
- journals JSONB, -- Journal/source filters
- enabled BOOLEAN DEFAULT TRUE,
- last_run TIMESTAMP,
- created_at TIMESTAMP DEFAULT NOW()
-);
-
-CREATE TABLE publication_search_runs (
- id SERIAL PRIMARY KEY,
- config_id INT REFERENCES publication_search_configs(id) ON DELETE CASCADE,
- run_at TIMESTAMP DEFAULT NOW(),
- candidates_found INT,
- new_candidates INT, -- After deduplication
- query_used TEXT,
- duration_ms INT
-);
-
-CREATE INDEX idx_pub_candidates_status ON publication_candidates(status);
-CREATE INDEX idx_pub_candidates_relevance ON publication_candidates(relevance_score DESC) WHERE status = 'pending';
-CREATE INDEX idx_pub_candidates_openalex ON publication_candidates(openalex_id);
-CREATE INDEX idx_pub_candidates_doi ON publication_candidates(doi);
-
--- Insert default search config
-INSERT INTO publication_search_configs (name, search_query, enabled)
-VALUES ('Y-DNA Haplogroup Discovery', 'Y-DNA haplogroup', TRUE);
-
--- !Downs
-
-DROP TABLE IF EXISTS publication_search_runs;
-DROP TABLE IF EXISTS publication_search_configs;
-DROP TABLE IF EXISTS publication_candidates;
diff --git a/conf/evolutions/default/32.sql b/conf/evolutions/default/32.sql
deleted file mode 100644
index 3b1eba1e..00000000
--- a/conf/evolutions/default/32.sql
+++ /dev/null
@@ -1,14 +0,0 @@
--- !Ups
-
-INSERT INTO publication_search_configs (name, search_query, concepts, enabled) VALUES
-('Forensic Anthropology and Bioarchaeology Studies', 'forensic anthropology bioarchaeology', '["https://api.openalex.org/concepts/wikidata:Q28065", "https://api.openalex.org/concepts/wikidata:Q13404081"]'::jsonb, TRUE),
-('Archaeology and Ancient Environmental Studies', 'archaeology ancient environmental', '["https://api.openalex.org/concepts/wikidata:Q23498", "https://api.openalex.org/concepts/wikidata:Q1561862"]'::jsonb, TRUE),
-('Forensic and Genetic Research', 'forensic genetic research', '["https://api.openalex.org/concepts/wikidata:Q495304", "https://api.openalex.org/concepts/wikidata:Q69953209"]'::jsonb, TRUE);
-
--- !Downs
-
-DELETE FROM publication_search_configs WHERE name IN (
-'Forensic Anthropology and Bioarchaeology Studies',
-'Archaeology and Ancient Environmental Studies',
-'Forensic and Genetic Research'
-);
diff --git a/conf/evolutions/default/33.sql b/conf/evolutions/default/33.sql
deleted file mode 100644
index b604aedb..00000000
--- a/conf/evolutions/default/33.sql
+++ /dev/null
@@ -1,12 +0,0 @@
--- !Ups
-
--- Insert default roles
-INSERT INTO auth.roles (id, name, description, created_at, updated_at)
-VALUES
-(gen_random_uuid(), 'Admin', 'Administrator with full access', NOW(), NOW()),
-(gen_random_uuid(), 'Curator', 'Curator access for managing content', NOW(), NOW())
-ON CONFLICT (name) DO NOTHING;
-
--- !Downs
-
-DELETE FROM auth.roles WHERE name IN ('Admin', 'Curator');
diff --git a/conf/evolutions/default/34.sql b/conf/evolutions/default/34.sql
deleted file mode 100644
index 2e6dc258..00000000
--- a/conf/evolutions/default/34.sql
+++ /dev/null
@@ -1,7 +0,0 @@
--- !Ups
-
-ALTER TABLE public.users RENAME COLUMN email TO email_hash;
-
--- !Downs
-
-ALTER TABLE public.users RENAME COLUMN email_hash TO email;
diff --git a/conf/evolutions/default/35.sql b/conf/evolutions/default/35.sql
deleted file mode 100644
index a3efff90..00000000
--- a/conf/evolutions/default/35.sql
+++ /dev/null
@@ -1,7 +0,0 @@
--- !Ups
-
-ALTER TABLE public.users RENAME COLUMN email_hash TO email_encrypted;
-
--- !Downs
-
-ALTER TABLE public.users RENAME COLUMN email_encrypted TO email_hash;
diff --git a/conf/evolutions/default/36.sql b/conf/evolutions/default/36.sql
deleted file mode 100644
index b268dd8f..00000000
--- a/conf/evolutions/default/36.sql
+++ /dev/null
@@ -1,17 +0,0 @@
--- !Ups
-
--- Insert permission
-INSERT INTO auth.permissions (id, name, description, created_at, updated_at)
-VALUES (gen_random_uuid(), 'view_publication_candidates', 'View and manage publication candidates', NOW(), NOW())
-ON CONFLICT (name) DO NOTHING;
-
--- Assign permission to Admin and Curator roles
-INSERT INTO auth.role_permissions (role_id, permission_id)
-SELECT r.id, p.id
-FROM auth.roles r, auth.permissions p
-WHERE r.name IN ('Admin', 'Curator') AND p.name = 'view_publication_candidates'
-ON CONFLICT DO NOTHING;
-
--- !Downs
-
-DELETE FROM auth.permissions WHERE name = 'view_publication_candidates';
diff --git a/conf/evolutions/default/37.sql b/conf/evolutions/default/37.sql
deleted file mode 100644
index 4377d5fd..00000000
--- a/conf/evolutions/default/37.sql
+++ /dev/null
@@ -1,17 +0,0 @@
--- !Ups
-
--- Add haplogroup reconciliation references to specimen_donor
--- Reconciliation is at the donor level since a donor may have multiple biosamples/runs
--- These link to HaplogroupReconciliation records for multi-run consensus
-ALTER TABLE specimen_donor
- ADD COLUMN y_dna_reconciliation_ref VARCHAR,
- ADD COLUMN mt_dna_reconciliation_ref VARCHAR;
-
-COMMENT ON COLUMN specimen_donor.y_dna_reconciliation_ref IS 'AT URI reference to Y-DNA haplogroup reconciliation record';
-COMMENT ON COLUMN specimen_donor.mt_dna_reconciliation_ref IS 'AT URI reference to MT-DNA haplogroup reconciliation record';
-
--- !Downs
-
-ALTER TABLE specimen_donor
- DROP COLUMN IF EXISTS y_dna_reconciliation_ref,
- DROP COLUMN IF EXISTS mt_dna_reconciliation_ref;
diff --git a/conf/evolutions/default/38.sql b/conf/evolutions/default/38.sql
deleted file mode 100644
index 3aadb991..00000000
--- a/conf/evolutions/default/38.sql
+++ /dev/null
@@ -1,81 +0,0 @@
--- !Ups
-
--- Population breakdown table for ancestry analysis results
--- Stores ADMIXTURE-style ancestry breakdowns at sub-continental granularity
-CREATE TABLE population_breakdown (
- id SERIAL PRIMARY KEY,
- at_uri VARCHAR UNIQUE,
- at_cid VARCHAR,
- sample_guid UUID NOT NULL,
- analysis_method VARCHAR NOT NULL, -- PCA_PROJECTION_GMM, ADMIXTURE, FASTSTRUCTURE, etc.
- panel_type VARCHAR, -- 'aims' (~5k SNPs) or 'genome-wide' (~500k SNPs)
- reference_populations VARCHAR, -- '1000G_HGDP_v1', '1000G', 'HGDP', etc.
- snps_analyzed INT, -- Total SNPs in the analysis panel
- snps_with_genotype INT, -- SNPs with valid genotype calls
- snps_missing INT, -- SNPs with no call or missing data
- confidence_level DOUBLE PRECISION, -- Overall confidence 0.0-1.0
- pca_coordinates JSONB, -- First 3 PCA coordinates [x, y, z]
- analysis_date TIMESTAMP,
- pipeline_version VARCHAR,
- reference_version VARCHAR,
- deleted BOOLEAN DEFAULT FALSE,
- created_at TIMESTAMP DEFAULT NOW(),
- updated_at TIMESTAMP DEFAULT NOW()
-);
-
-CREATE INDEX idx_population_breakdown_sample_guid ON population_breakdown(sample_guid);
-CREATE INDEX idx_population_breakdown_at_uri ON population_breakdown(at_uri) WHERE at_uri IS NOT NULL;
-
-COMMENT ON TABLE population_breakdown IS 'Ancestry composition analysis results using PCA projection onto 1000G + HGDP reference populations';
-
--- Population components (sub-continental level, ~33 populations)
-CREATE TABLE population_component (
- id SERIAL PRIMARY KEY,
- population_breakdown_id INT NOT NULL REFERENCES population_breakdown(id) ON DELETE CASCADE,
- population_code VARCHAR NOT NULL, -- CEU, YRI, CHB, GIH, etc.
- population_name VARCHAR, -- Northwestern European, Yoruba, Han Chinese, etc.
- super_population VARCHAR, -- European, African, East Asian, South Asian, etc.
- percentage DOUBLE PRECISION NOT NULL, -- 0.0-100.0
- confidence_lower DOUBLE PRECISION, -- 95% CI lower bound
- confidence_upper DOUBLE PRECISION, -- 95% CI upper bound
- rank INT -- Display rank by percentage (1 = highest)
-);
-
-CREATE INDEX idx_population_component_breakdown ON population_component(population_breakdown_id);
-
-COMMENT ON TABLE population_component IS 'Individual population components in an ancestry breakdown (~33 reference populations)';
-
--- Super-population summary (continental level, 9 super-populations)
-CREATE TABLE super_population_summary (
- id SERIAL PRIMARY KEY,
- population_breakdown_id INT NOT NULL REFERENCES population_breakdown(id) ON DELETE CASCADE,
- super_population VARCHAR NOT NULL, -- European, African, East Asian, etc.
- percentage DOUBLE PRECISION NOT NULL, -- Combined percentage 0.0-100.0
- populations JSONB -- Array of contributing population codes
-);
-
-CREATE INDEX idx_super_population_breakdown ON super_population_summary(population_breakdown_id);
-
-COMMENT ON TABLE super_population_summary IS 'Aggregated ancestry at continental level (9 super-populations)';
-
--- Seed reference populations lookup table if it doesn't exist with all codes
--- First check if population table exists and add missing populations
-INSERT INTO population (population_name)
-SELECT unnest(ARRAY[
- 'CEU', 'FIN', 'GBR', 'IBS', 'TSI', -- European
- 'YRI', 'LWK', 'ESN', 'MSL', 'GWD', -- African
- 'CHB', 'JPT', 'KHV', 'CHS', 'CDX', -- East Asian
- 'GIH', 'PJL', 'BEB', 'STU', 'ITU', -- South Asian
- 'MXL', 'PUR', 'PEL', 'CLM', -- Americas
- 'Druze', 'Palestinian', 'Bedouin', -- West Asian (HGDP)
- 'Papuan', 'Melanesian', -- Oceanian (HGDP)
- 'Yakut', -- Central Asian (HGDP)
- 'Maya', 'Pima', 'Karitiana' -- Native American (HGDP)
-])
-ON CONFLICT (population_name) DO NOTHING;
-
--- !Downs
-
-DROP TABLE IF EXISTS super_population_summary;
-DROP TABLE IF EXISTS population_component;
-DROP TABLE IF EXISTS population_breakdown;
diff --git a/conf/evolutions/default/39.sql b/conf/evolutions/default/39.sql
deleted file mode 100644
index a572a056..00000000
--- a/conf/evolutions/default/39.sql
+++ /dev/null
@@ -1,38 +0,0 @@
--- !Ups
-
--- Genotype data table for chip/array-based genetic data
--- Stores metadata about SNP array files and their quality metrics
-CREATE TABLE genotype_data (
- id SERIAL PRIMARY KEY,
- at_uri VARCHAR UNIQUE,
- at_cid VARCHAR,
- sample_guid UUID NOT NULL,
- test_type_id INT REFERENCES test_type_definition(id),
- provider VARCHAR, -- 23andMe, AncestryDNA, FTDNA, LivingDNA, MyHeritage
- chip_version VARCHAR,
- build_version VARCHAR, -- GRCh37, GRCh38
- source_file_hash VARCHAR, -- SHA-256 for deduplication
- -- Metrics consolidated into JSONB to reduce column count
- -- Contains: totalMarkersCalled, totalMarkersPossible, callRate, noCallRate,
- -- yMarkersCalled, yMarkersTotal, mtMarkersCalled, mtMarkersTotal,
- -- autosomalMarkersCalled, hetRate, testDate, processedAt,
- -- derivedYHaplogroup, derivedMtHaplogroup, files
- metrics JSONB NOT NULL DEFAULT '{}',
- population_breakdown_id INT REFERENCES population_breakdown(id),
- deleted BOOLEAN DEFAULT FALSE,
- created_at TIMESTAMP DEFAULT NOW(),
- updated_at TIMESTAMP DEFAULT NOW()
-);
-
-CREATE INDEX idx_genotype_sample_guid ON genotype_data(sample_guid);
-CREATE INDEX idx_genotype_test_type ON genotype_data(test_type_id);
-CREATE INDEX idx_genotype_at_uri ON genotype_data(at_uri) WHERE at_uri IS NOT NULL;
-CREATE INDEX idx_genotype_provider ON genotype_data(provider);
-
-COMMENT ON TABLE genotype_data IS 'SNP array/chip genotype data with quality metrics and derived haplogroups';
-COMMENT ON COLUMN genotype_data.source_file_hash IS 'SHA-256 hash for file deduplication';
-COMMENT ON COLUMN genotype_data.metrics IS 'JSONB containing: totalMarkersCalled, totalMarkersPossible, callRate, noCallRate, yMarkersCalled, yMarkersTotal, mtMarkersCalled, mtMarkersTotal, autosomalMarkersCalled, hetRate, testDate, processedAt, derivedYHaplogroup, derivedMtHaplogroup, files';
-
--- !Downs
-
-DROP TABLE IF EXISTS genotype_data;
diff --git a/conf/evolutions/default/4.sql b/conf/evolutions/default/4.sql
deleted file mode 100644
index 333eeb15..00000000
--- a/conf/evolutions/default/4.sql
+++ /dev/null
@@ -1,25 +0,0 @@
-# --- !Ups
---- Add revision tracking table
-CREATE TABLE relationship_revision_metadata
-(
- haplogroup_relationship_id INT NOT NULL,
- revision_id INT NOT NULL,
- author VARCHAR(255) NOT NULL,
- timestamp TIMESTAMP NOT NULL,
- comment TEXT NOT NULL,
- change_type VARCHAR(50) NOT NULL,
- previous_revision_id INT,
- PRIMARY KEY (haplogroup_relationship_id, revision_id),
- FOREIGN KEY (haplogroup_relationship_id)
- REFERENCES haplogroup_relationship (haplogroup_relationship_id)
- ON DELETE CASCADE
-);
-
--- Indexes for common queries
-CREATE INDEX idx_revision_metadata_author ON relationship_revision_metadata (author);
-CREATE INDEX idx_revision_metadata_timestamp ON relationship_revision_metadata (timestamp);
-CREATE INDEX idx_revision_metadata_change_type ON relationship_revision_metadata (change_type);
-
-# --- !Downs
-
-DROP TABLE relationship_revision_metadata;
\ No newline at end of file
diff --git a/conf/evolutions/default/40.sql b/conf/evolutions/default/40.sql
deleted file mode 100644
index 3c350565..00000000
--- a/conf/evolutions/default/40.sql
+++ /dev/null
@@ -1,71 +0,0 @@
--- !Ups
-
--- Haplogroup reconciliation table for multi-run/multi-biosample consensus
--- Stored at specimen_donor level since a donor may have multiple biosamples
--- from different testing companies or labs that need reconciliation
-
-CREATE TYPE dna_type AS ENUM ('Y_DNA', 'MT_DNA');
-
-CREATE TABLE haplogroup_reconciliation (
- id SERIAL PRIMARY KEY,
- at_uri VARCHAR UNIQUE,
- at_cid VARCHAR,
- specimen_donor_id INT NOT NULL REFERENCES specimen_donor(id),
- dna_type dna_type NOT NULL,
-
- -- Reconciliation status metrics consolidated into JSONB
- -- Contains: compatibilityLevel, consensusHaplogroup, statusConfidence,
- -- branchCompatibilityScore, snpConcordance, runCount, warnings
- status JSONB NOT NULL DEFAULT '{}',
-
- -- Run calls stored as JSONB array of RunHaplogroupCall objects
- -- Each call: { sourceRef, haplogroup, confidence, callMethod, score,
- -- supportingSnps, conflictingSnps, noCalls, technology,
- -- meanCoverage, treeVersion, strPrediction }
- run_calls JSONB NOT NULL,
-
- -- Optional conflict/heteroplasmy data
- -- Each conflict: { position, snpName, contigAccession, calls[], resolution, resolvedValue }
- snp_conflicts JSONB,
-
- -- Each observation: { position, majorAllele, minorAllele, majorAlleleFrequency,
- -- depth, isDefiningSnp, affectedHaplogroup }
- heteroplasmy_observations JSONB,
-
- -- Identity verification metrics
- -- { kinshipCoefficient, fingerprintSnpConcordance, yStrDistance,
- -- verificationStatus, verificationMethod }
- identity_verification JSONB,
-
- -- Manual override if user corrected the consensus
- -- { overriddenHaplogroup, reason, overriddenAt, overriddenBy }
- manual_override JSONB,
-
- -- Audit log of reconciliation changes
- -- Each entry: { timestamp, action, previousConsensus, newConsensus, runRef, notes }
- audit_log JSONB,
-
- last_reconciliation_at TIMESTAMP,
- deleted BOOLEAN DEFAULT FALSE,
- created_at TIMESTAMP DEFAULT NOW(),
- updated_at TIMESTAMP DEFAULT NOW()
-);
-
--- Unique constraint: one reconciliation per donor per DNA type
-CREATE UNIQUE INDEX idx_reconciliation_donor_dna_type
- ON haplogroup_reconciliation(specimen_donor_id, dna_type)
- WHERE deleted = FALSE;
-
-CREATE INDEX idx_reconciliation_specimen_donor ON haplogroup_reconciliation(specimen_donor_id);
-CREATE INDEX idx_reconciliation_at_uri ON haplogroup_reconciliation(at_uri) WHERE at_uri IS NOT NULL;
--- Index on JSONB field for consensus haplogroup queries
-CREATE INDEX idx_reconciliation_consensus ON haplogroup_reconciliation((status->>'consensusHaplogroup'));
-
-COMMENT ON TABLE haplogroup_reconciliation IS 'Multi-run haplogroup reconciliation at specimen donor level';
-COMMENT ON COLUMN haplogroup_reconciliation.run_calls IS 'Array of RunHaplogroupCall objects from each source (runs, alignments, STR profiles)';
-COMMENT ON COLUMN haplogroup_reconciliation.status IS 'JSONB containing: compatibilityLevel, consensusHaplogroup, statusConfidence, branchCompatibilityScore (LCA_depth / max(depth_A, depth_B) - 1.0 = fully compatible), snpConcordance, runCount, warnings';
-
--- !Downs
-
-DROP TABLE IF EXISTS haplogroup_reconciliation;
-DROP TYPE IF EXISTS dna_type;
diff --git a/conf/evolutions/default/41.sql b/conf/evolutions/default/41.sql
deleted file mode 100644
index 2578c3de..00000000
--- a/conf/evolutions/default/41.sql
+++ /dev/null
@@ -1,56 +0,0 @@
--- !Ups
-
--- Move user_pds_info from public schema to auth schema
--- This table stores where each user's AT Protocol identity lives (their home PDS)
-
--- Step 1: Create the new table in auth schema
-CREATE TABLE auth.user_pds_info
-(
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- user_id UUID UNIQUE NOT NULL,
- pds_url VARCHAR(512) NOT NULL, -- Increased length for longer PDS URLs
- did VARCHAR(255) UNIQUE NOT NULL,
- handle VARCHAR(255), -- Cache the resolved handle
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
- CONSTRAINT fk_auth_user_pds_info_user_id FOREIGN KEY (user_id) REFERENCES public.users (id) ON DELETE CASCADE
-);
-
--- Step 2: Migrate existing data
-INSERT INTO auth.user_pds_info (id, user_id, pds_url, did, created_at, updated_at)
-SELECT id, user_id, pds_url, did, created_at, updated_at
-FROM public.user_pds_info;
-
--- Step 3: Drop the old table
-DROP TABLE public.user_pds_info;
-
--- Step 4: Add indexes for common lookups
-CREATE INDEX idx_auth_user_pds_info_did ON auth.user_pds_info(did);
-CREATE INDEX idx_auth_user_pds_info_handle ON auth.user_pds_info(handle) WHERE handle IS NOT NULL;
-
-COMMENT ON TABLE auth.user_pds_info IS 'Stores the home PDS URL for each user - where their AT Protocol identity lives';
-COMMENT ON COLUMN auth.user_pds_info.pds_url IS 'The resolved PDS endpoint URL (e.g., https://bsky.social or https://pds.decodingus.com)';
-COMMENT ON COLUMN auth.user_pds_info.handle IS 'Cached handle for quick lookups without re-resolution';
-
--- !Downs
-
--- Recreate the table in public schema
-CREATE TABLE public.user_pds_info
-(
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- user_id UUID UNIQUE NOT NULL,
- pds_url VARCHAR(255) NOT NULL,
- did VARCHAR(255) UNIQUE NOT NULL,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
- CONSTRAINT fk_user_pds_info_user_id FOREIGN KEY (user_id) REFERENCES public.users (id) ON DELETE CASCADE,
- CONSTRAINT fk_user_pds_info_did FOREIGN KEY (did) REFERENCES public.users (did) ON DELETE CASCADE
-);
-
--- Migrate data back
-INSERT INTO public.user_pds_info (id, user_id, pds_url, did, created_at, updated_at)
-SELECT id, user_id, pds_url, did, created_at, updated_at
-FROM auth.user_pds_info;
-
--- Drop the auth table
-DROP TABLE auth.user_pds_info;
diff --git a/conf/evolutions/default/42.sql b/conf/evolutions/default/42.sql
deleted file mode 100644
index 16140219..00000000
--- a/conf/evolutions/default/42.sql
+++ /dev/null
@@ -1,29 +0,0 @@
--- !Ups
-
--- Rename email_encrypted back to email and ensure CITEXT for case-insensitive uniqueness
--- Drop any existing constraints on the column (both possible names)
-ALTER TABLE public.users DROP CONSTRAINT IF EXISTS users_email_encrypted_key;
-ALTER TABLE public.users DROP CONSTRAINT IF EXISTS users_email_key;
-
--- Rename the column
-ALTER TABLE public.users RENAME COLUMN email_encrypted TO email;
-
--- Change type to CITEXT for case-insensitive comparison (if not already)
-ALTER TABLE public.users ALTER COLUMN email TYPE CITEXT USING email::CITEXT;
-
--- Add unique constraint (case-insensitive via CITEXT)
-ALTER TABLE public.users ADD CONSTRAINT users_email_key UNIQUE (email);
-
--- !Downs
-
--- Drop the unique constraint
-ALTER TABLE public.users DROP CONSTRAINT IF EXISTS users_email_key;
-
--- Change type back to VARCHAR (stored encrypted values were text)
-ALTER TABLE public.users ALTER COLUMN email TYPE VARCHAR(255);
-
--- Rename back to email_encrypted
-ALTER TABLE public.users RENAME COLUMN email TO email_encrypted;
-
--- Re-add the original constraint
-ALTER TABLE public.users ADD CONSTRAINT users_email_encrypted_key UNIQUE (email_encrypted);
diff --git a/conf/evolutions/default/43.sql b/conf/evolutions/default/43.sql
deleted file mode 100644
index 9d7b1866..00000000
--- a/conf/evolutions/default/43.sql
+++ /dev/null
@@ -1,31 +0,0 @@
--- !Ups
-
--- Cookie consent tracking for GDPR compliance
--- Tracks when users accept the cookie policy
-
-CREATE TABLE auth.cookie_consents (
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- user_id UUID REFERENCES public.users(id) ON DELETE CASCADE,
- session_id VARCHAR(255), -- For anonymous users before login
- ip_address_hash VARCHAR(64), -- Hashed for privacy, used for anonymous consent
- consent_given BOOLEAN NOT NULL DEFAULT FALSE,
- consent_timestamp TIMESTAMP NOT NULL DEFAULT NOW(),
- policy_version VARCHAR(20) NOT NULL DEFAULT '1.0', -- Track which version they accepted
- user_agent TEXT,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
-
- -- Either user_id or session_id must be present
- CONSTRAINT chk_consent_identity CHECK (user_id IS NOT NULL OR session_id IS NOT NULL)
-);
-
--- Index for quick lookups
-CREATE INDEX idx_cookie_consents_user_id ON auth.cookie_consents(user_id) WHERE user_id IS NOT NULL;
-CREATE INDEX idx_cookie_consents_session_id ON auth.cookie_consents(session_id) WHERE session_id IS NOT NULL;
-
-COMMENT ON TABLE auth.cookie_consents IS 'Tracks user acceptance of cookie policy for GDPR compliance';
-COMMENT ON COLUMN auth.cookie_consents.policy_version IS 'Version of the cookie policy the user accepted';
-COMMENT ON COLUMN auth.cookie_consents.ip_address_hash IS 'SHA-256 hash of IP address for anonymous consent tracking';
-
--- !Downs
-
-DROP TABLE auth.cookie_consents;
diff --git a/conf/evolutions/default/44.sql b/conf/evolutions/default/44.sql
deleted file mode 100644
index 4f26a92a..00000000
--- a/conf/evolutions/default/44.sql
+++ /dev/null
@@ -1,60 +0,0 @@
--- !Ups
-
--- Support schema for contact/messaging system
-CREATE SCHEMA support;
-
--- Contact messages from users (both authenticated and anonymous)
-CREATE TABLE support.contact_messages (
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-
- -- For authenticated users
- user_id UUID REFERENCES public.users(id) ON DELETE SET NULL,
-
- -- For anonymous users (captured from form)
- sender_name VARCHAR(255),
- sender_email VARCHAR(255),
-
- -- Message content
- subject VARCHAR(500) NOT NULL,
- message TEXT NOT NULL,
-
- -- Status tracking
- status VARCHAR(50) NOT NULL DEFAULT 'new', -- new, read, replied, closed
-
- -- Metadata
- ip_address_hash VARCHAR(64),
- user_agent TEXT,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW()
-);
-
--- Admin replies to contact messages
-CREATE TABLE support.message_replies (
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- message_id UUID NOT NULL REFERENCES support.contact_messages(id) ON DELETE CASCADE,
- admin_user_id UUID NOT NULL REFERENCES public.users(id) ON DELETE RESTRICT,
- reply_text TEXT NOT NULL,
-
- -- For anonymous users, track if email was sent
- email_sent BOOLEAN NOT NULL DEFAULT FALSE,
- email_sent_at TIMESTAMP,
-
- created_at TIMESTAMP NOT NULL DEFAULT NOW()
-);
-
--- Indexes
-CREATE INDEX idx_contact_messages_user_id ON support.contact_messages(user_id) WHERE user_id IS NOT NULL;
-CREATE INDEX idx_contact_messages_status ON support.contact_messages(status);
-CREATE INDEX idx_contact_messages_created_at ON support.contact_messages(created_at DESC);
-CREATE INDEX idx_message_replies_message_id ON support.message_replies(message_id);
-
-COMMENT ON SCHEMA support IS 'Support ticket and contact message system';
-COMMENT ON TABLE support.contact_messages IS 'Contact form submissions from authenticated and anonymous users';
-COMMENT ON TABLE support.message_replies IS 'Admin replies to contact messages';
-COMMENT ON COLUMN support.contact_messages.status IS 'Message status: new, read, replied, closed';
-
--- !Downs
-
-DROP TABLE support.message_replies;
-DROP TABLE support.contact_messages;
-DROP SCHEMA support;
diff --git a/conf/evolutions/default/45.sql b/conf/evolutions/default/45.sql
deleted file mode 100644
index 9d1b9986..00000000
--- a/conf/evolutions/default/45.sql
+++ /dev/null
@@ -1,11 +0,0 @@
--- !Ups
-
--- Track when authenticated users last viewed their message history
--- This allows us to show a badge for new replies since their last visit
-ALTER TABLE support.contact_messages ADD COLUMN user_last_viewed_at TIMESTAMP;
-
-COMMENT ON COLUMN support.contact_messages.user_last_viewed_at IS 'Timestamp when authenticated user last viewed this message thread';
-
--- !Downs
-
-ALTER TABLE support.contact_messages DROP COLUMN user_last_viewed_at;
diff --git a/conf/evolutions/default/46.sql b/conf/evolutions/default/46.sql
deleted file mode 100644
index 17aa03ff..00000000
--- a/conf/evolutions/default/46.sql
+++ /dev/null
@@ -1,72 +0,0 @@
--- !Ups
-
--- Add TreeCurator role
-INSERT INTO auth.roles (id, name, description, created_at, updated_at)
-VALUES (gen_random_uuid(), 'TreeCurator', 'Curator access for haplogroups and variants', NOW(), NOW())
-ON CONFLICT (name) DO NOTHING;
-
--- Create curator permissions
-INSERT INTO auth.permissions (id, name, description, created_at, updated_at) VALUES
- (gen_random_uuid(), 'haplogroup.view', 'View haplogroup details', NOW(), NOW()),
- (gen_random_uuid(), 'haplogroup.create', 'Create new haplogroups', NOW(), NOW()),
- (gen_random_uuid(), 'haplogroup.update', 'Update existing haplogroups', NOW(), NOW()),
- (gen_random_uuid(), 'haplogroup.delete', 'Delete haplogroups', NOW(), NOW()),
- (gen_random_uuid(), 'variant.view', 'View variant details', NOW(), NOW()),
- (gen_random_uuid(), 'variant.create', 'Create new variants', NOW(), NOW()),
- (gen_random_uuid(), 'variant.update', 'Update existing variants', NOW(), NOW()),
- (gen_random_uuid(), 'variant.delete', 'Delete variants', NOW(), NOW()),
- (gen_random_uuid(), 'audit.view', 'View audit history', NOW(), NOW())
-ON CONFLICT (name) DO NOTHING;
-
--- Grant all curator permissions to TreeCurator role
-INSERT INTO auth.role_permissions (role_id, permission_id)
-SELECT r.id, p.id FROM auth.roles r, auth.permissions p
-WHERE r.name = 'TreeCurator'
- AND p.name IN ('haplogroup.view', 'haplogroup.create', 'haplogroup.update', 'haplogroup.delete',
- 'variant.view', 'variant.create', 'variant.update', 'variant.delete', 'audit.view')
-ON CONFLICT DO NOTHING;
-
--- Grant all curator permissions to Admin role
-INSERT INTO auth.role_permissions (role_id, permission_id)
-SELECT r.id, p.id FROM auth.roles r, auth.permissions p
-WHERE r.name = 'Admin'
- AND p.name IN ('haplogroup.view', 'haplogroup.create', 'haplogroup.update', 'haplogroup.delete',
- 'variant.view', 'variant.create', 'variant.update', 'variant.delete', 'audit.view')
-ON CONFLICT DO NOTHING;
-
--- Create curator schema
-CREATE SCHEMA IF NOT EXISTS curator;
-
--- Create audit_log table
-CREATE TABLE curator.audit_log (
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- user_id UUID NOT NULL,
- entity_type VARCHAR(50) NOT NULL,
- entity_id INT NOT NULL,
- action VARCHAR(20) NOT NULL,
- old_value JSONB,
- new_value JSONB,
- comment TEXT,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- CONSTRAINT fk_audit_log_user_id FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE SET NULL
-);
-
-CREATE INDEX idx_audit_log_entity ON curator.audit_log(entity_type, entity_id);
-CREATE INDEX idx_audit_log_user ON curator.audit_log(user_id);
-CREATE INDEX idx_audit_log_created_at ON curator.audit_log(created_at DESC);
-
-COMMENT ON TABLE curator.audit_log IS 'Audit trail for all curator actions on haplogroups and variants';
-
--- !Downs
-
-DROP TABLE IF EXISTS curator.audit_log;
-DROP SCHEMA IF EXISTS curator;
-
-DELETE FROM auth.role_permissions
-WHERE permission_id IN (SELECT id FROM auth.permissions WHERE name LIKE 'haplogroup.%' OR name LIKE 'variant.%' OR name = 'audit.view');
-
-DELETE FROM auth.permissions
-WHERE name IN ('haplogroup.view', 'haplogroup.create', 'haplogroup.update', 'haplogroup.delete',
- 'variant.view', 'variant.create', 'variant.update', 'variant.delete', 'audit.view');
-
-DELETE FROM auth.roles WHERE name = 'TreeCurator';
diff --git a/conf/evolutions/default/47.sql b/conf/evolutions/default/47.sql
deleted file mode 100644
index 4acc1e67..00000000
--- a/conf/evolutions/default/47.sql
+++ /dev/null
@@ -1,36 +0,0 @@
-# --- !Ups
-
--- Variant Alias Table
--- Stores alternative names for variants from different sources (YBrowse, ISOGG, YFull, publications, etc.)
--- A single variant may be known by multiple names across different research groups.
-
-CREATE TABLE variant_alias (
- id SERIAL PRIMARY KEY,
- variant_id INT NOT NULL REFERENCES variant(variant_id) ON DELETE CASCADE,
- alias_type VARCHAR(50) NOT NULL, -- 'common_name', 'rs_id', 'isogg', 'yfull', 'ftdna', etc.
- alias_value VARCHAR(255) NOT NULL,
- source VARCHAR(255), -- Origin: 'ybrowse', 'isogg', 'curator', 'yfull', etc.
- is_primary BOOLEAN DEFAULT FALSE, -- Primary alias for this type (for display preference)
- created_at TIMESTAMP DEFAULT NOW() NOT NULL,
- UNIQUE(variant_id, alias_type, alias_value)
-);
-
-CREATE INDEX idx_variant_alias_variant ON variant_alias(variant_id);
-CREATE INDEX idx_variant_alias_value ON variant_alias(alias_value);
-CREATE INDEX idx_variant_alias_type_value ON variant_alias(alias_type, alias_value);
-
--- Migrate existing names to alias table
--- This preserves the current common_name and rs_id as aliases
-INSERT INTO variant_alias (variant_id, alias_type, alias_value, source, is_primary)
-SELECT variant_id, 'common_name', common_name, 'migration', TRUE
-FROM variant
-WHERE common_name IS NOT NULL;
-
-INSERT INTO variant_alias (variant_id, alias_type, alias_value, source, is_primary)
-SELECT variant_id, 'rs_id', rs_id, 'migration', TRUE
-FROM variant
-WHERE rs_id IS NOT NULL;
-
-# --- !Downs
-
-DROP TABLE IF EXISTS variant_alias;
diff --git a/conf/evolutions/default/48.sql b/conf/evolutions/default/48.sql
deleted file mode 100644
index f38a4dad..00000000
--- a/conf/evolutions/default/48.sql
+++ /dev/null
@@ -1,32 +0,0 @@
--- # --- !Ups
-
--- Add branch age estimate columns to haplogroup table
--- Dates stored as years before present (YBP) with optional confidence intervals
-
-ALTER TABLE tree.haplogroup
- ADD COLUMN formed_ybp INTEGER,
- ADD COLUMN formed_ybp_lower INTEGER,
- ADD COLUMN formed_ybp_upper INTEGER,
- ADD COLUMN tmrca_ybp INTEGER,
- ADD COLUMN tmrca_ybp_lower INTEGER,
- ADD COLUMN tmrca_ybp_upper INTEGER,
- ADD COLUMN age_estimate_source VARCHAR(100);
-
-COMMENT ON COLUMN tree.haplogroup.formed_ybp IS 'Estimated years before present when branch formed (mutation occurred)';
-COMMENT ON COLUMN tree.haplogroup.formed_ybp_lower IS 'Lower bound of 95% confidence interval for formed date';
-COMMENT ON COLUMN tree.haplogroup.formed_ybp_upper IS 'Upper bound of 95% confidence interval for formed date';
-COMMENT ON COLUMN tree.haplogroup.tmrca_ybp IS 'Estimated years before present for Time to Most Recent Common Ancestor';
-COMMENT ON COLUMN tree.haplogroup.tmrca_ybp_lower IS 'Lower bound of 95% confidence interval for TMRCA';
-COMMENT ON COLUMN tree.haplogroup.tmrca_ybp_upper IS 'Upper bound of 95% confidence interval for TMRCA';
-COMMENT ON COLUMN tree.haplogroup.age_estimate_source IS 'Source of age estimates (e.g., YFull, internal calculation)';
-
--- # --- !Downs
-
-ALTER TABLE tree.haplogroup
- DROP COLUMN IF EXISTS formed_ybp,
- DROP COLUMN IF EXISTS formed_ybp_lower,
- DROP COLUMN IF EXISTS formed_ybp_upper,
- DROP COLUMN IF EXISTS tmrca_ybp,
- DROP COLUMN IF EXISTS tmrca_ybp_lower,
- DROP COLUMN IF EXISTS tmrca_ybp_upper,
- DROP COLUMN IF EXISTS age_estimate_source;
diff --git a/conf/evolutions/default/49.sql b/conf/evolutions/default/49.sql
deleted file mode 100644
index 05cd3b47..00000000
--- a/conf/evolutions/default/49.sql
+++ /dev/null
@@ -1,16 +0,0 @@
--- !Ups
-
--- Simplify reference genome naming in genbank_contig table
--- - Remove patch versions: GRCh37.p13 -> GRCh37, GRCh38.p14 -> GRCh38
--- - Use UCSC convention for T2T-CHM13: T2T-CHM13v2.0 -> hs1
-
-UPDATE genbank_contig SET reference_genome = 'GRCh37' WHERE reference_genome = 'GRCh37.p13';
-UPDATE genbank_contig SET reference_genome = 'GRCh38' WHERE reference_genome = 'GRCh38.p14';
-UPDATE genbank_contig SET reference_genome = 'hs1' WHERE reference_genome = 'T2T-CHM13v2.0';
-
--- !Downs
-
--- Restore original reference genome naming
-UPDATE genbank_contig SET reference_genome = 'GRCh37.p13' WHERE reference_genome = 'GRCh37';
-UPDATE genbank_contig SET reference_genome = 'GRCh38.p14' WHERE reference_genome = 'GRCh38';
-UPDATE genbank_contig SET reference_genome = 'T2T-CHM13v2.0' WHERE reference_genome = 'hs1';
diff --git a/conf/evolutions/default/5.sql b/conf/evolutions/default/5.sql
deleted file mode 100644
index c3b2a4a3..00000000
--- a/conf/evolutions/default/5.sql
+++ /dev/null
@@ -1,18 +0,0 @@
-# --- !Ups
---- Add variant revision tracking table
-CREATE TABLE haplogroup_variant_metadata (
- haplogroup_variant_id INT NOT NULL,
- revision_id INT NOT NULL,
- author VARCHAR(255) NOT NULL,
- timestamp TIMESTAMP NOT NULL,
- comment TEXT NOT NULL,
- change_type VARCHAR(50) NOT NULL,
- previous_revision_id INT,
- PRIMARY KEY (haplogroup_variant_id, revision_id),
- FOREIGN KEY (haplogroup_variant_id)
- REFERENCES haplogroup_variant (haplogroup_variant_id)
- ON DELETE CASCADE
-);
-
-# --- !Downs
-DROP TABLE haplogroup_variant_metadata;
\ No newline at end of file
diff --git a/conf/evolutions/default/50.sql b/conf/evolutions/default/50.sql
deleted file mode 100644
index abdb4d8f..00000000
--- a/conf/evolutions/default/50.sql
+++ /dev/null
@@ -1,69 +0,0 @@
-# --- !Ups
-
--- Genome region structural annotations
--- References existing genbank_contig table for chromosome data
-
--- Version tracking for ETag generation
-CREATE TABLE genome_region_version (
- id SERIAL PRIMARY KEY,
- reference_genome VARCHAR(20) NOT NULL UNIQUE, -- GRCh37, GRCh38, hs1
- data_version VARCHAR(20) NOT NULL, -- e.g., "2024.12.1"
- updated_at TIMESTAMP DEFAULT NOW()
-);
-
--- Structural regions (centromere, telomere, PAR, XTR, ampliconic, etc.)
-CREATE TABLE genome_region (
- id SERIAL PRIMARY KEY,
- genbank_contig_id INT NOT NULL REFERENCES genbank_contig(genbank_contig_id),
- region_type VARCHAR(30) NOT NULL, -- Centromere, Telomere_P, Telomere_Q, PAR1, PAR2, XTR, Ampliconic, Palindrome, Heterochromatin, XDegenerate
- name VARCHAR(50), -- For named regions (P1-P8 palindromes)
- start_pos BIGINT NOT NULL,
- end_pos BIGINT NOT NULL,
- modifier DECIMAL(3,2), -- Quality modifier (0.1-1.0)
- UNIQUE(genbank_contig_id, region_type, name, start_pos)
-);
-
--- Cytoband annotations for ideogram display
-CREATE TABLE cytoband (
- id SERIAL PRIMARY KEY,
- genbank_contig_id INT NOT NULL REFERENCES genbank_contig(genbank_contig_id),
- name VARCHAR(20) NOT NULL, -- p11.32, q11.21, etc.
- start_pos BIGINT NOT NULL,
- end_pos BIGINT NOT NULL,
- stain VARCHAR(10) NOT NULL, -- gneg, gpos25, gpos50, gpos75, gpos100, acen, gvar, stalk
- UNIQUE(genbank_contig_id, name)
-);
-
--- STR marker positions
-CREATE TABLE str_marker (
- id SERIAL PRIMARY KEY,
- genbank_contig_id INT NOT NULL REFERENCES genbank_contig(genbank_contig_id),
- name VARCHAR(30) NOT NULL, -- DYS389I, DYS456, etc.
- start_pos BIGINT NOT NULL,
- end_pos BIGINT NOT NULL,
- period INT NOT NULL, -- Repeat unit length in bp
- verified BOOLEAN DEFAULT false,
- note TEXT,
- UNIQUE(genbank_contig_id, name)
-);
-
--- Insert initial version records
-INSERT INTO genome_region_version (reference_genome, data_version) VALUES
- ('GRCh37', '2024.12.1'),
- ('GRCh38', '2024.12.1'),
- ('hs1', '2024.12.1');
-
-CREATE INDEX idx_genome_region_contig ON genome_region(genbank_contig_id);
-CREATE INDEX idx_cytoband_contig ON cytoband(genbank_contig_id);
-CREATE INDEX idx_str_marker_contig ON str_marker(genbank_contig_id);
-
-# --- !Downs
-
-DROP INDEX IF EXISTS idx_str_marker_contig;
-DROP INDEX IF EXISTS idx_cytoband_contig;
-DROP INDEX IF EXISTS idx_genome_region_contig;
-
-DROP TABLE IF EXISTS str_marker;
-DROP TABLE IF EXISTS cytoband;
-DROP TABLE IF EXISTS genome_region;
-DROP TABLE IF EXISTS genome_region_version;
diff --git a/conf/evolutions/default/51.sql b/conf/evolutions/default/51.sql
deleted file mode 100644
index 200c24a1..00000000
--- a/conf/evolutions/default/51.sql
+++ /dev/null
@@ -1,43 +0,0 @@
--- !Ups
-
--- Genome regions curator permissions
-INSERT INTO auth.permissions (id, name, description, created_at, updated_at) VALUES
- (gen_random_uuid(), 'genome_region.view', 'View genome region details', NOW(), NOW()),
- (gen_random_uuid(), 'genome_region.create', 'Create genome regions', NOW(), NOW()),
- (gen_random_uuid(), 'genome_region.update', 'Update genome regions', NOW(), NOW()),
- (gen_random_uuid(), 'genome_region.delete', 'Delete genome regions', NOW(), NOW()),
- (gen_random_uuid(), 'cytoband.view', 'View cytoband details', NOW(), NOW()),
- (gen_random_uuid(), 'cytoband.create', 'Create cytobands', NOW(), NOW()),
- (gen_random_uuid(), 'cytoband.update', 'Update cytobands', NOW(), NOW()),
- (gen_random_uuid(), 'cytoband.delete', 'Delete cytobands', NOW(), NOW()),
- (gen_random_uuid(), 'str_marker.view', 'View STR marker details', NOW(), NOW()),
- (gen_random_uuid(), 'str_marker.create', 'Create STR markers', NOW(), NOW()),
- (gen_random_uuid(), 'str_marker.update', 'Update STR markers', NOW(), NOW()),
- (gen_random_uuid(), 'str_marker.delete', 'Delete STR markers', NOW(), NOW())
-ON CONFLICT (name) DO NOTHING;
-
--- Grant to Curator role
-INSERT INTO auth.role_permissions (role_id, permission_id)
-SELECT r.id, p.id FROM auth.roles r, auth.permissions p
-WHERE r.name = 'Curator'
- AND p.name IN ('genome_region.view', 'genome_region.create', 'genome_region.update', 'genome_region.delete',
- 'cytoband.view', 'cytoband.create', 'cytoband.update', 'cytoband.delete',
- 'str_marker.view', 'str_marker.create', 'str_marker.update', 'str_marker.delete')
-ON CONFLICT DO NOTHING;
-
--- Grant to Admin role
-INSERT INTO auth.role_permissions (role_id, permission_id)
-SELECT r.id, p.id FROM auth.roles r, auth.permissions p
-WHERE r.name = 'Admin'
- AND p.name IN ('genome_region.view', 'genome_region.create', 'genome_region.update', 'genome_region.delete',
- 'cytoband.view', 'cytoband.create', 'cytoband.update', 'cytoband.delete',
- 'str_marker.view', 'str_marker.create', 'str_marker.update', 'str_marker.delete')
-ON CONFLICT DO NOTHING;
-
--- !Downs
-
-DELETE FROM auth.role_permissions
-WHERE permission_id IN (SELECT id FROM auth.permissions WHERE name LIKE 'genome_region.%' OR name LIKE 'cytoband.%' OR name LIKE 'str_marker.%');
-
-DELETE FROM auth.permissions
-WHERE name LIKE 'genome_region.%' OR name LIKE 'cytoband.%' OR name LIKE 'str_marker.%';
diff --git a/conf/evolutions/default/52.sql b/conf/evolutions/default/52.sql
deleted file mode 100644
index 44e581f9..00000000
--- a/conf/evolutions/default/52.sql
+++ /dev/null
@@ -1,15 +0,0 @@
-# --- !Ups
-
--- Add provenance JSONB column to haplogroup table for multi-source attribution tracking
-ALTER TABLE tree.haplogroup ADD COLUMN provenance JSONB;
-
--- Add GIN index for efficient querying by provenance fields
-CREATE INDEX idx_haplogroup_provenance ON tree.haplogroup USING GIN (provenance);
-
--- Add comment for documentation
-COMMENT ON COLUMN tree.haplogroup.provenance IS 'JSONB tracking node and variant provenance from multiple sources. Structure: {primaryCredit, nodeProvenance[], variantProvenance{}, lastMergedAt, lastMergedFrom}';
-
-# --- !Downs
-
-DROP INDEX IF EXISTS tree.idx_haplogroup_provenance;
-ALTER TABLE tree.haplogroup DROP COLUMN IF EXISTS provenance;
diff --git a/conf/evolutions/default/53.sql b/conf/evolutions/default/53.sql
deleted file mode 100644
index d0d283f6..00000000
--- a/conf/evolutions/default/53.sql
+++ /dev/null
@@ -1,251 +0,0 @@
-# --- !Ups
-
--- ==============================================================================
--- VARIANT_V2: Consolidated variant table with JSONB coordinates and aliases
--- Replaces: variant, variant_alias tables
--- Reference: documents/proposals/variant-schema-simplification.md
--- ==============================================================================
-
--- mutation_type values:
--- Point mutations: SNP, INDEL, MNP
--- Repeat variations: STR
--- Structural variants: DEL, DUP, INS, INV, CNV, TRANS
--- naming_status values: UNNAMED, PENDING_REVIEW, NAMED
--- aliases structure: {common_names: [], rs_ids: [], sources: {ybrowse: [], isogg: [], ...}}
--- coordinates structure: {hs1: {contig, position, ref, alt}, GRCh38: {...}, ...}
-
-CREATE TABLE variant_v2 (
- variant_id SERIAL PRIMARY KEY,
- canonical_name TEXT,
- mutation_type TEXT NOT NULL DEFAULT 'SNP',
- naming_status TEXT NOT NULL DEFAULT 'UNNAMED',
- aliases JSONB DEFAULT '{}'::jsonb,
- coordinates JSONB DEFAULT '{}'::jsonb,
- defining_haplogroup_id INTEGER REFERENCES tree.haplogroup(haplogroup_id) ON DELETE SET NULL,
- evidence JSONB DEFAULT '{}'::jsonb,
- primers JSONB DEFAULT '{}'::jsonb,
- notes TEXT,
- created_at TIMESTAMPTZ DEFAULT NOW(),
- updated_at TIMESTAMPTZ DEFAULT NOW()
-);
-
-COMMENT ON TABLE variant_v2 IS 'Consolidated variant table with JSONB coordinates supporting multiple reference genomes. One row per logical variant.';
-COMMENT ON COLUMN variant_v2.canonical_name IS 'Primary variant name (e.g., M269, DYS456). NULL for unnamed/novel variants.';
-COMMENT ON COLUMN variant_v2.mutation_type IS 'Variant type: SNP, INDEL, MNP (point) | STR (repeat) | DEL, DUP, INS, INV, CNV, TRANS (structural)';
-COMMENT ON COLUMN variant_v2.aliases IS 'JSONB containing all known names: {common_names: [], rs_ids: [], sources: {source: [names]}}';
-COMMENT ON COLUMN variant_v2.coordinates IS 'Per-assembly coordinates. Structure varies by mutation_type. hs1 is primary reference.';
-COMMENT ON COLUMN variant_v2.defining_haplogroup_id IS 'Haplogroup this variant defines. Distinguishes parallel mutations (same name, different lineages).';
-
--- Unique constraint for named variants (allows parallel mutations with different haplogroups)
-CREATE UNIQUE INDEX idx_variant_v2_name_haplogroup
- ON variant_v2(canonical_name, COALESCE(defining_haplogroup_id, -1))
- WHERE canonical_name IS NOT NULL;
-
--- For unnamed variants, uniqueness based on hs1 coordinates (primary reference)
-CREATE UNIQUE INDEX idx_variant_v2_unnamed_coordinates
- ON variant_v2(
- (coordinates->'hs1'->>'contig'),
- ((coordinates->'hs1'->>'position')::int),
- (coordinates->'hs1'->>'ref'),
- (coordinates->'hs1'->>'alt')
- )
- WHERE canonical_name IS NULL AND coordinates ? 'hs1';
-
--- Performance indexes
-CREATE INDEX idx_variant_v2_canonical ON variant_v2(canonical_name);
-CREATE INDEX idx_variant_v2_aliases ON variant_v2 USING GIN(aliases);
-CREATE INDEX idx_variant_v2_coordinates ON variant_v2 USING GIN(coordinates);
-CREATE INDEX idx_variant_v2_mutation_type ON variant_v2(mutation_type);
-CREATE INDEX idx_variant_v2_defining_haplogroup ON variant_v2(defining_haplogroup_id);
-
--- Search index for alias common_names array
-CREATE INDEX idx_variant_v2_alias_common_names ON variant_v2
- USING GIN((aliases->'common_names') jsonb_path_ops);
-
--- ==============================================================================
--- SUPPORTING TABLES: ASR and branch mutation tracking
--- ==============================================================================
-
--- Haplogroup character states (ASR reconstructed states at tree nodes)
-CREATE TABLE haplogroup_character_state (
- id SERIAL PRIMARY KEY,
- haplogroup_id INT NOT NULL REFERENCES tree.haplogroup(haplogroup_id) ON DELETE CASCADE,
- variant_id INT NOT NULL REFERENCES variant_v2(variant_id) ON DELETE CASCADE,
-
- -- The inferred state at this node
- -- For SNPs: "ancestral" or "derived" (or the actual allele: "G", "A")
- -- For STRs: the repeat count as string (e.g., "15") or "NULL" for null alleles
- inferred_state TEXT NOT NULL,
-
- -- Confidence from ASR algorithm
- confidence DECIMAL(5,4), -- 0.0000 to 1.0000
-
- -- For uncertain reconstructions: probability distribution over states
- state_probabilities JSONB,
- -- Example: {"13": 0.05, "14": 0.25, "15": 0.65, "16": 0.05}
-
- -- ASR metadata
- algorithm TEXT, -- "parsimony", "ml", "bayesian"
- reconstructed_at TIMESTAMPTZ DEFAULT NOW(),
-
- UNIQUE(haplogroup_id, variant_id)
-);
-
-CREATE INDEX idx_character_state_haplogroup ON haplogroup_character_state(haplogroup_id);
-CREATE INDEX idx_character_state_variant ON haplogroup_character_state(variant_id);
-
-COMMENT ON TABLE haplogroup_character_state IS 'ASR reconstructed character states at haplogroup nodes. Replaces haplogroup_ancestral_str concept.';
-COMMENT ON COLUMN haplogroup_character_state.inferred_state IS 'Inferred state: SNP allele, STR repeat count, SV presence, etc.';
-
--- Branch mutations (state changes along tree branches)
-CREATE TABLE branch_mutation (
- id SERIAL PRIMARY KEY,
- variant_id INT NOT NULL REFERENCES variant_v2(variant_id) ON DELETE CASCADE,
-
- -- The branch where the mutation occurred (parent -> child)
- parent_haplogroup_id INT NOT NULL REFERENCES tree.haplogroup(haplogroup_id) ON DELETE CASCADE,
- child_haplogroup_id INT NOT NULL REFERENCES tree.haplogroup(haplogroup_id) ON DELETE CASCADE,
-
- -- State transition
- from_state TEXT NOT NULL, -- "G" or "15"
- to_state TEXT NOT NULL, -- "A" or "16"
-
- -- For STRs: direction of change (+1 = expansion, -1 = contraction, NULL for SNPs)
- step_direction INT,
-
- -- Confidence from ASR
- confidence DECIMAL(5,4),
-
- UNIQUE(variant_id, parent_haplogroup_id, child_haplogroup_id)
-);
-
-CREATE INDEX idx_branch_mutation_child ON branch_mutation(child_haplogroup_id);
-CREATE INDEX idx_branch_mutation_parent ON branch_mutation(parent_haplogroup_id);
-CREATE INDEX idx_branch_mutation_variant ON branch_mutation(variant_id);
-
-COMMENT ON TABLE branch_mutation IS 'State transitions along tree branches for all variant types (SNP, STR, SV).';
-
--- Biosample variant calls (observed values from samples, input to ASR)
-CREATE TABLE biosample_variant_call (
- id SERIAL PRIMARY KEY,
- biosample_id INT NOT NULL REFERENCES biosample(id) ON DELETE CASCADE,
- variant_id INT NOT NULL REFERENCES variant_v2(variant_id) ON DELETE CASCADE,
-
- -- The observed state
- -- For SNPs: "ref", "alt", "het", or actual alleles
- -- For STRs: repeat count as string (e.g., "15") or "NULL"
- observed_state TEXT NOT NULL,
-
- -- Call quality metrics
- quality_score INT,
- read_depth INT,
- confidence TEXT, -- "high", "medium", "low"
-
- -- Source attribution
- source TEXT, -- "ftdna", "yfull", "user_upload"
- created_at TIMESTAMPTZ DEFAULT NOW(),
-
- UNIQUE(biosample_id, variant_id)
-);
-
-CREATE INDEX idx_biosample_variant_call_biosample ON biosample_variant_call(biosample_id);
-CREATE INDEX idx_biosample_variant_call_variant ON biosample_variant_call(variant_id);
-
-COMMENT ON TABLE biosample_variant_call IS 'Observed variant calls from biosamples. Input data for ASR.';
-
--- STR mutation rates (reference data for ASR and age estimation)
-CREATE TABLE str_mutation_rate (
- id SERIAL PRIMARY KEY,
- marker_name TEXT NOT NULL UNIQUE, -- DYS456, DYS389I, etc.
- panel_names TEXT[], -- PowerPlex, YHRD, BigY, etc.
-
- -- Mutation rate per generation
- mutation_rate DECIMAL(12,10) NOT NULL,
- mutation_rate_lower DECIMAL(12,10), -- 95% CI lower
- mutation_rate_upper DECIMAL(12,10), -- 95% CI upper
-
- -- Directional bias (for stepwise mutation model)
- omega_plus DECIMAL(5,4) DEFAULT 0.5, -- Probability of expansion
- omega_minus DECIMAL(5,4) DEFAULT 0.5, -- Probability of contraction
-
- -- Multi-step mutation frequencies
- multi_step_rate DECIMAL(5,4), -- omega_2 + omega_3 + ...
-
- source TEXT, -- Ballantyne 2010, Willems 2016, etc.
- created_at TIMESTAMPTZ DEFAULT NOW()
-);
-
-CREATE INDEX idx_str_mutation_rate_marker ON str_mutation_rate(marker_name);
-
-COMMENT ON TABLE str_mutation_rate IS 'Per-marker STR mutation rates for ASR and age estimation. Sources: Ballantyne 2010, Willems 2016.';
-
--- ==============================================================================
--- NOTE: Old tables (variant, variant_alias, str_marker) are NOT dropped here.
--- Data migration and cleanup should be done manually:
--- 1. Run migration script to consolidate data into variant_v2
--- 2. Update haplogroup_variant FK references
--- 3. Drop old tables after verification
--- ==============================================================================
-
-# --- !Downs
-
--- Recreate old tables (structure only - data would need restoration from backup)
-CREATE TABLE variant (
- variant_id SERIAL PRIMARY KEY,
- genbank_contig_id INT NOT NULL,
- position INTEGER NOT NULL,
- reference_allele VARCHAR(255) NOT NULL,
- alternate_allele VARCHAR(255) NOT NULL,
- variant_type VARCHAR(10) NOT NULL CHECK (variant_type IN ('SNP', 'INDEL')),
- rs_id VARCHAR(255),
- common_name VARCHAR(255),
- FOREIGN KEY (genbank_contig_id) REFERENCES genbank_contig(genbank_contig_id) ON DELETE CASCADE,
- UNIQUE (genbank_contig_id, position, reference_allele, alternate_allele)
-);
-
-CREATE INDEX idx_variant_common_name ON variant(common_name);
-CREATE INDEX idx_variant_rs_id ON variant(rs_id);
-CREATE INDEX idx_variant_position ON variant(genbank_contig_id, position);
-
-CREATE TABLE variant_alias (
- id SERIAL PRIMARY KEY,
- variant_id INT NOT NULL REFERENCES variant(variant_id) ON DELETE CASCADE,
- alias_type VARCHAR(50) NOT NULL,
- alias_value VARCHAR(255) NOT NULL,
- source VARCHAR(255),
- is_primary BOOLEAN DEFAULT FALSE,
- created_at TIMESTAMP DEFAULT NOW() NOT NULL,
- UNIQUE(variant_id, alias_type, alias_value)
-);
-
-CREATE INDEX idx_variant_alias_variant ON variant_alias(variant_id);
-CREATE INDEX idx_variant_alias_value ON variant_alias(alias_value);
-CREATE INDEX idx_variant_alias_type_value ON variant_alias(alias_type, alias_value);
-
-CREATE TABLE str_marker (
- id SERIAL PRIMARY KEY,
- genbank_contig_id INT NOT NULL REFERENCES genbank_contig(genbank_contig_id) ON DELETE CASCADE,
- name VARCHAR(50) NOT NULL,
- start_pos BIGINT NOT NULL,
- end_pos BIGINT NOT NULL,
- period INT NOT NULL,
- verified BOOLEAN DEFAULT FALSE,
- note TEXT,
- UNIQUE(genbank_contig_id, name)
-);
-
-CREATE INDEX idx_str_marker_contig ON str_marker(genbank_contig_id);
-
--- Drop new tables
-DROP TABLE IF EXISTS str_mutation_rate CASCADE;
-DROP TABLE IF EXISTS biosample_variant_call CASCADE;
-DROP TABLE IF EXISTS branch_mutation CASCADE;
-DROP TABLE IF EXISTS haplogroup_character_state CASCADE;
-
--- Restore FK on haplogroup_variant (will need manual data restoration)
-ALTER TABLE tree.haplogroup_variant DROP CONSTRAINT IF EXISTS haplogroup_variant_variant_id_fkey;
-ALTER TABLE tree.haplogroup_variant
-ADD CONSTRAINT haplogroup_variant_variant_id_fkey
-FOREIGN KEY (variant_id) REFERENCES variant(variant_id) ON DELETE CASCADE;
-
-DROP TABLE IF EXISTS variant_v2 CASCADE;
diff --git a/conf/evolutions/default/54.sql b/conf/evolutions/default/54.sql
deleted file mode 100644
index 644c7bfa..00000000
--- a/conf/evolutions/default/54.sql
+++ /dev/null
@@ -1,13 +0,0 @@
-# --- !Ups
-
--- Sequence for DecodingUs variant names
-CREATE SEQUENCE IF NOT EXISTS du_variant_name_seq START WITH 1;
-
-COMMENT ON SEQUENCE du_variant_name_seq IS 'Sequence for DecodingUs (DU) variant naming authority';
-
-# --- !Downs
-
-DROP FUNCTION IF EXISTS is_du_name(TEXT);
-DROP FUNCTION IF EXISTS current_du_name();
-DROP FUNCTION IF EXISTS next_du_name();
-DROP SEQUENCE IF EXISTS du_variant_name_seq;
diff --git a/conf/evolutions/default/55.sql b/conf/evolutions/default/55.sql
deleted file mode 100644
index 58236a1b..00000000
--- a/conf/evolutions/default/55.sql
+++ /dev/null
@@ -1,45 +0,0 @@
-# --- !Ups
-
-DROP TABLE IF EXISTS genome_region;
-DROP TABLE IF EXISTS cytoband;
-
-CREATE TABLE genome_region_v2 (
- region_id SERIAL PRIMARY KEY,
- region_type TEXT NOT NULL,
- name TEXT,
- coordinates JSONB NOT NULL,
- properties JSONB DEFAULT '{}',
- UNIQUE(region_type, name)
-);
-
-CREATE INDEX idx_genome_region_v2_coords ON genome_region_v2 USING GIN(coordinates);
-
--- Efficient lookup: "What region contains GRCh38:chrY:15000000?"
-CREATE INDEX idx_genome_region_v2_grch38_range ON genome_region_v2 (
- (coordinates->'GRCh38'->>'contig'),
- ((coordinates->'GRCh38'->>'start')::bigint),
- ((coordinates->'GRCh38'->>'end')::bigint)
-);
-
-# --- !Downs
-
-DROP TABLE IF EXISTS genome_region_v2;
-
-CREATE TABLE genome_region (
- id SERIAL PRIMARY KEY,
- genbank_contig_id INT NOT NULL,
- region_type TEXT NOT NULL,
- name TEXT,
- start_pos BIGINT NOT NULL,
- end_pos BIGINT NOT NULL,
- modifier NUMERIC
-);
-
-CREATE TABLE cytoband (
- id SERIAL PRIMARY KEY,
- genbank_contig_id INT NOT NULL,
- name TEXT NOT NULL,
- start_pos BIGINT NOT NULL,
- end_pos BIGINT NOT NULL,
- stain TEXT NOT NULL
-);
diff --git a/conf/evolutions/default/56.sql b/conf/evolutions/default/56.sql
deleted file mode 100644
index cd9439f9..00000000
--- a/conf/evolutions/default/56.sql
+++ /dev/null
@@ -1,12 +0,0 @@
-# --- !Ups
-
-ALTER TABLE variant_v2 ADD COLUMN annotations JSONB DEFAULT '{}'::jsonb;
-
-COMMENT ON COLUMN variant_v2.annotations IS 'Computed region overlaps (e.g., Cytobands, PAR, STR overlaps). Managed by background jobs.';
-
-CREATE INDEX idx_variant_v2_annotations ON variant_v2 USING GIN(annotations);
-
-# --- !Downs
-
-DROP INDEX IF EXISTS idx_variant_v2_annotations;
-ALTER TABLE variant_v2 DROP COLUMN annotations;
diff --git a/conf/evolutions/default/57.sql b/conf/evolutions/default/57.sql
deleted file mode 100644
index 437863e3..00000000
--- a/conf/evolutions/default/57.sql
+++ /dev/null
@@ -1,207 +0,0 @@
-# --- !Ups
-
--- ============================================================================
--- Evolution 57: Tree Versioning System
--- ============================================================================
--- Introduces Production/WIP tree versioning for bulk merge operations.
--- Change sets track groups of changes from external sources (ISOGG, ytree.net).
--- Individual changes are recorded for curator review before promotion.
--- ============================================================================
-
--- Change set status enum
-CREATE TYPE tree.change_set_status AS ENUM (
- 'DRAFT', -- Being built (merge in progress)
- 'READY_FOR_REVIEW', -- Merge complete, awaiting curator
- 'UNDER_REVIEW', -- Curator actively reviewing
- 'APPLIED', -- Changes applied to Production
- 'DISCARDED' -- Changes abandoned
-);
-
--- Tree change type enum
-CREATE TYPE tree.tree_change_type AS ENUM (
- 'CREATE', -- New haplogroup created
- 'UPDATE', -- Haplogroup metadata updated
- 'DELETE', -- Haplogroup deleted (soft)
- 'REPARENT', -- Parent relationship changed
- 'ADD_VARIANT', -- Variant associated with haplogroup
- 'REMOVE_VARIANT' -- Variant disassociated from haplogroup
-);
-
--- Change status enum
-CREATE TYPE tree.change_status AS ENUM (
- 'PENDING', -- Not yet applied
- 'APPLIED', -- Successfully applied to Production
- 'REVERTED', -- Undone by curator
- 'SKIPPED' -- Excluded from promotion by curator
-);
-
--- ============================================================================
--- Change Sets: Groups of related changes from a single merge operation
--- ============================================================================
-
-CREATE TABLE tree.change_set (
- id SERIAL PRIMARY KEY,
- haplogroup_type VARCHAR(10) NOT NULL CHECK (haplogroup_type IN ('Y', 'MT')),
- name VARCHAR(100) NOT NULL,
- description TEXT,
- source_name VARCHAR(100) NOT NULL,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- created_by VARCHAR(255) NOT NULL,
- finalized_at TIMESTAMP,
- applied_at TIMESTAMP,
- applied_by VARCHAR(255),
- discarded_at TIMESTAMP,
- discarded_by VARCHAR(255),
- discard_reason TEXT,
- status tree.change_set_status NOT NULL DEFAULT 'DRAFT',
-
- -- Statistics snapshot from merge
- nodes_processed INTEGER DEFAULT 0,
- nodes_created INTEGER DEFAULT 0,
- nodes_updated INTEGER DEFAULT 0,
- nodes_unchanged INTEGER DEFAULT 0,
- variants_added INTEGER DEFAULT 0,
- relationships_created INTEGER DEFAULT 0,
- relationships_updated INTEGER DEFAULT 0,
- split_operations INTEGER DEFAULT 0,
- ambiguity_count INTEGER DEFAULT 0,
-
- -- Path to generated ambiguity report
- ambiguity_report_path VARCHAR(500),
-
- -- Additional metadata
- metadata JSONB DEFAULT '{}',
-
- UNIQUE(haplogroup_type, name)
-);
-
-CREATE INDEX idx_change_set_type ON tree.change_set(haplogroup_type);
-CREATE INDEX idx_change_set_status ON tree.change_set(status);
-CREATE INDEX idx_change_set_source ON tree.change_set(source_name);
-CREATE INDEX idx_change_set_created ON tree.change_set(created_at);
-
--- ============================================================================
--- Tree Changes: Individual changes within a change set
--- ============================================================================
-
-CREATE TABLE tree.tree_change (
- id SERIAL PRIMARY KEY,
- change_set_id INTEGER NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE,
- change_type tree.tree_change_type NOT NULL,
-
- -- Target identification (for UPDATE/DELETE/REPARENT)
- haplogroup_id INTEGER REFERENCES tree.haplogroup(haplogroup_id),
-
- -- For variant operations
- variant_id INTEGER REFERENCES variant(variant_id),
-
- -- Parent tracking (for CREATE and REPARENT)
- old_parent_id INTEGER REFERENCES tree.haplogroup(haplogroup_id),
- new_parent_id INTEGER REFERENCES tree.haplogroup(haplogroup_id),
-
- -- Full data snapshots (JSONB for flexibility)
- haplogroup_data JSONB, -- Full haplogroup for CREATE, new values for UPDATE
- old_data JSONB, -- Previous state for UPDATE (audit trail)
-
- -- For newly created haplogroups, track the assigned ID after apply
- created_haplogroup_id INTEGER REFERENCES tree.haplogroup(haplogroup_id),
-
- -- Ordering and status
- sequence_num INTEGER NOT NULL,
- status tree.change_status NOT NULL DEFAULT 'PENDING',
-
- -- Curator review
- reviewed_at TIMESTAMP,
- reviewed_by VARCHAR(255),
- review_notes TEXT,
-
- -- Timestamps
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- applied_at TIMESTAMP,
-
- -- Ambiguity reference (if this change relates to an ambiguous placement)
- ambiguity_type VARCHAR(50),
- ambiguity_confidence DOUBLE PRECISION
-);
-
-CREATE INDEX idx_tree_change_set ON tree.tree_change(change_set_id);
-CREATE INDEX idx_tree_change_hg ON tree.tree_change(haplogroup_id);
-CREATE INDEX idx_tree_change_type ON tree.tree_change(change_type);
-CREATE INDEX idx_tree_change_status ON tree.tree_change(status);
-CREATE INDEX idx_tree_change_seq ON tree.tree_change(change_set_id, sequence_num);
-
--- ============================================================================
--- Change Set Comments: Discussion thread for curator collaboration
--- ============================================================================
-
-CREATE TABLE tree.change_set_comment (
- id SERIAL PRIMARY KEY,
- change_set_id INTEGER NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE,
- tree_change_id INTEGER REFERENCES tree.tree_change(id) ON DELETE CASCADE,
- author VARCHAR(255) NOT NULL,
- content TEXT NOT NULL,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP
-);
-
-CREATE INDEX idx_change_set_comment_set ON tree.change_set_comment(change_set_id);
-CREATE INDEX idx_change_set_comment_change ON tree.change_set_comment(tree_change_id);
-
--- ============================================================================
--- Views for easy querying
--- ============================================================================
-
--- Active (non-applied, non-discarded) change sets
-CREATE VIEW tree.active_change_sets AS
-SELECT *
-FROM tree.change_set
-WHERE status NOT IN ('APPLIED', 'DISCARDED');
-
--- Change set summary with review progress
-CREATE VIEW tree.change_set_summary AS
-SELECT
- cs.id,
- cs.haplogroup_type,
- cs.name,
- cs.source_name,
- cs.status,
- cs.created_at,
- cs.created_by,
- cs.nodes_created,
- cs.nodes_updated,
- cs.ambiguity_count,
- COUNT(tc.id) AS total_changes,
- COUNT(tc.id) FILTER (WHERE tc.status = 'PENDING') AS pending_changes,
- COUNT(tc.id) FILTER (WHERE tc.status = 'APPLIED') AS applied_changes,
- COUNT(tc.id) FILTER (WHERE tc.status = 'SKIPPED') AS skipped_changes,
- COUNT(tc.id) FILTER (WHERE tc.reviewed_at IS NOT NULL) AS reviewed_changes
-FROM tree.change_set cs
-LEFT JOIN tree.tree_change tc ON tc.change_set_id = cs.id
-GROUP BY cs.id;
-
--- Pending changes requiring review (high priority = low confidence)
-CREATE VIEW tree.pending_review_changes AS
-SELECT
- tc.*,
- cs.name AS change_set_name,
- cs.source_name,
- h.name AS haplogroup_name
-FROM tree.tree_change tc
-JOIN tree.change_set cs ON cs.id = tc.change_set_id
-LEFT JOIN tree.haplogroup h ON h.haplogroup_id = tc.haplogroup_id
-WHERE tc.status = 'PENDING'
- AND cs.status IN ('READY_FOR_REVIEW', 'UNDER_REVIEW')
-ORDER BY tc.ambiguity_confidence ASC NULLS LAST, tc.sequence_num;
-
-
-# --- !Downs
-
-DROP VIEW IF EXISTS tree.pending_review_changes;
-DROP VIEW IF EXISTS tree.change_set_summary;
-DROP VIEW IF EXISTS tree.active_change_sets;
-DROP TABLE IF EXISTS tree.change_set_comment;
-DROP TABLE IF EXISTS tree.tree_change;
-DROP TABLE IF EXISTS tree.change_set;
-DROP TYPE IF EXISTS tree.change_status;
-DROP TYPE IF EXISTS tree.tree_change_type;
-DROP TYPE IF EXISTS tree.change_set_status;
diff --git a/conf/evolutions/default/58.sql b/conf/evolutions/default/58.sql
deleted file mode 100644
index f1900141..00000000
--- a/conf/evolutions/default/58.sql
+++ /dev/null
@@ -1,38 +0,0 @@
--- !Ups
-
--- Tree versioning curator permissions
-INSERT INTO auth.permissions (id, name, description, created_at, updated_at) VALUES
- (gen_random_uuid(), 'tree.version.view', 'View change sets and diffs from tree merge operations', NOW(), NOW()),
- (gen_random_uuid(), 'tree.version.review', 'Review and approve/reject individual changes', NOW(), NOW()),
- (gen_random_uuid(), 'tree.version.promote', 'Apply approved change sets to production', NOW(), NOW()),
- (gen_random_uuid(), 'tree.version.discard', 'Discard change sets', NOW(), NOW())
-ON CONFLICT (name) DO NOTHING;
-
--- Grant to TreeCurator role
-INSERT INTO auth.role_permissions (role_id, permission_id)
-SELECT r.id, p.id FROM auth.roles r, auth.permissions p
-WHERE r.name = 'TreeCurator'
- AND p.name IN ('tree.version.view', 'tree.version.review', 'tree.version.promote', 'tree.version.discard')
-ON CONFLICT DO NOTHING;
-
--- Grant to Curator role
-INSERT INTO auth.role_permissions (role_id, permission_id)
-SELECT r.id, p.id FROM auth.roles r, auth.permissions p
-WHERE r.name = 'Curator'
- AND p.name IN ('tree.version.view', 'tree.version.review', 'tree.version.promote', 'tree.version.discard')
-ON CONFLICT DO NOTHING;
-
--- Grant to Admin role
-INSERT INTO auth.role_permissions (role_id, permission_id)
-SELECT r.id, p.id FROM auth.roles r, auth.permissions p
-WHERE r.name = 'Admin'
- AND p.name IN ('tree.version.view', 'tree.version.review', 'tree.version.promote', 'tree.version.discard')
-ON CONFLICT DO NOTHING;
-
--- !Downs
-
-DELETE FROM auth.role_permissions
-WHERE permission_id IN (SELECT id FROM auth.permissions WHERE name LIKE 'tree.version.%');
-
-DELETE FROM auth.permissions
-WHERE name LIKE 'tree.version.%';
diff --git a/conf/evolutions/default/59.sql b/conf/evolutions/default/59.sql
deleted file mode 100644
index 16d84ba1..00000000
--- a/conf/evolutions/default/59.sql
+++ /dev/null
@@ -1,144 +0,0 @@
--- !Ups
-
--- Shadow/WIP tables for staging tree changes before production apply
--- Each table is scoped by change_set_id for easy cleanup on discard
-
--- WIP haplogroups - staged nodes not yet in production
-CREATE TABLE tree.wip_haplogroup (
- wip_haplogroup_id SERIAL PRIMARY KEY,
- change_set_id INTEGER NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE,
-
- -- Placeholder ID used during merge (negative numbers to avoid collision)
- placeholder_id INTEGER NOT NULL,
-
- -- Haplogroup data (mirrors tree.haplogroup structure)
- name VARCHAR(255) NOT NULL,
- lineage VARCHAR(255),
- description TEXT,
- haplogroup_type VARCHAR(10) NOT NULL CHECK (haplogroup_type IN ('Y', 'MT')),
- source VARCHAR(255) NOT NULL,
- confidence_level VARCHAR(255) NOT NULL DEFAULT 'medium',
-
- -- Age estimates
- formed_ybp INTEGER,
- formed_ybp_lower INTEGER,
- formed_ybp_upper INTEGER,
- tmrca_ybp INTEGER,
- tmrca_ybp_lower INTEGER,
- tmrca_ybp_upper INTEGER,
- age_estimate_source VARCHAR(255),
-
- -- Provenance tracking
- provenance JSONB,
-
- -- Timestamps
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
-
- -- Unique within a change set
- UNIQUE (change_set_id, placeholder_id),
- UNIQUE (change_set_id, name)
-);
-
-CREATE INDEX idx_wip_haplogroup_change_set ON tree.wip_haplogroup(change_set_id);
-CREATE INDEX idx_wip_haplogroup_name ON tree.wip_haplogroup(name);
-
--- WIP relationships - staged parent-child relationships
--- Can reference either production haplogroups (by real ID) or WIP haplogroups (by placeholder ID)
-CREATE TABLE tree.wip_haplogroup_relationship (
- wip_relationship_id SERIAL PRIMARY KEY,
- change_set_id INTEGER NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE,
-
- -- Child reference: either a real haplogroup ID or a placeholder (negative) ID
- child_haplogroup_id INTEGER, -- NULL if child is a WIP node
- child_placeholder_id INTEGER, -- NULL if child is a production node
-
- -- Parent reference: either a real haplogroup ID or a placeholder (negative) ID
- parent_haplogroup_id INTEGER, -- NULL if parent is a WIP node
- parent_placeholder_id INTEGER, -- NULL if parent is a production node
-
- -- Metadata
- source VARCHAR(255) NOT NULL,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
-
- -- Constraints
- CHECK (
- (child_haplogroup_id IS NOT NULL AND child_placeholder_id IS NULL) OR
- (child_haplogroup_id IS NULL AND child_placeholder_id IS NOT NULL)
- ),
- CHECK (
- (parent_haplogroup_id IS NOT NULL AND parent_placeholder_id IS NULL) OR
- (parent_haplogroup_id IS NULL AND parent_placeholder_id IS NOT NULL)
- )
-);
-
-CREATE INDEX idx_wip_relationship_change_set ON tree.wip_haplogroup_relationship(change_set_id);
-CREATE INDEX idx_wip_relationship_child ON tree.wip_haplogroup_relationship(child_haplogroup_id);
-CREATE INDEX idx_wip_relationship_parent ON tree.wip_haplogroup_relationship(parent_haplogroup_id);
-
--- WIP variant associations - staged variant links
-CREATE TABLE tree.wip_haplogroup_variant (
- wip_haplogroup_variant_id SERIAL PRIMARY KEY,
- change_set_id INTEGER NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE,
-
- -- Haplogroup reference: either real ID or placeholder
- haplogroup_id INTEGER, -- NULL if haplogroup is a WIP node
- haplogroup_placeholder_id INTEGER, -- NULL if haplogroup is a production node
-
- -- Variant reference (always a real variant ID from genomics.variant_v2)
- variant_id INTEGER NOT NULL,
-
- -- Metadata
- source VARCHAR(255),
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
-
- -- Constraints
- CHECK (
- (haplogroup_id IS NOT NULL AND haplogroup_placeholder_id IS NULL) OR
- (haplogroup_id IS NULL AND haplogroup_placeholder_id IS NOT NULL)
- ),
-
- -- Unique variant per haplogroup within a change set
- UNIQUE (change_set_id, haplogroup_id, variant_id),
- UNIQUE (change_set_id, haplogroup_placeholder_id, variant_id)
-);
-
-CREATE INDEX idx_wip_variant_change_set ON tree.wip_haplogroup_variant(change_set_id);
-CREATE INDEX idx_wip_variant_haplogroup ON tree.wip_haplogroup_variant(haplogroup_id);
-
--- WIP reparent operations - tracks existing nodes that should be moved
-CREATE TABLE tree.wip_reparent (
- wip_reparent_id SERIAL PRIMARY KEY,
- change_set_id INTEGER NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE,
-
- -- The existing production haplogroup to reparent
- haplogroup_id INTEGER NOT NULL,
-
- -- Current parent in production (for rollback reference)
- old_parent_id INTEGER,
-
- -- New parent: either real ID or placeholder
- new_parent_id INTEGER, -- NULL if new parent is a WIP node
- new_parent_placeholder_id INTEGER, -- NULL if new parent is a production node
-
- -- Metadata
- source VARCHAR(255) NOT NULL,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
-
- -- Constraints
- CHECK (
- (new_parent_id IS NOT NULL AND new_parent_placeholder_id IS NULL) OR
- (new_parent_id IS NULL AND new_parent_placeholder_id IS NOT NULL)
- ),
-
- -- Only one reparent per haplogroup per change set
- UNIQUE (change_set_id, haplogroup_id)
-);
-
-CREATE INDEX idx_wip_reparent_change_set ON tree.wip_reparent(change_set_id);
-
--- !Downs
-
-DROP TABLE IF EXISTS tree.wip_reparent;
-DROP TABLE IF EXISTS tree.wip_haplogroup_variant;
-DROP TABLE IF EXISTS tree.wip_haplogroup_relationship;
-DROP TABLE IF EXISTS tree.wip_haplogroup;
diff --git a/conf/evolutions/default/6.sql b/conf/evolutions/default/6.sql
deleted file mode 100644
index 24f5e6a1..00000000
--- a/conf/evolutions/default/6.sql
+++ /dev/null
@@ -1,189 +0,0 @@
-# --- !Ups
---- Add tables for Authentication and Authorization
-CREATE SCHEMA auth;
-CREATE EXTENSION IF NOT EXISTS citext;
-
--- Schema: public
--- Users Table
-CREATE TABLE public.users
-(
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- email CITEXT UNIQUE,
- did VARCHAR(255) UNIQUE NOT NULL,
- handle VARCHAR(255) UNIQUE,
- display_name VARCHAR(255),
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
- is_active BOOLEAN NOT NULL DEFAULT TRUE
-);
-
--- User PDS Information
-CREATE TABLE public.user_pds_info
-(
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- user_id UUID UNIQUE NOT NULL,
- pds_url VARCHAR(255) NOT NULL,
- did VARCHAR(255) UNIQUE NOT NULL,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
- CONSTRAINT fk_user_pds_info_user_id FOREIGN KEY (user_id) REFERENCES public.users (id) ON DELETE CASCADE,
- CONSTRAINT fk_user_pds_info_did FOREIGN KEY (did) REFERENCES public.users (did) ON DELETE CASCADE
-);
-
--- Reputation Event Types Table
-CREATE TABLE public.reputation_event_types
-(
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- name VARCHAR(100) UNIQUE NOT NULL,
- description TEXT,
- default_points_change INTEGER NOT NULL,
- is_positive BOOLEAN NOT NULL,
- is_system_generated BOOLEAN NOT NULL DEFAULT FALSE,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW()
-);
-
--- Reputation Events Log Table
-CREATE TABLE public.reputation_events
-(
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- user_id UUID NOT NULL,
- event_type_id UUID NOT NULL,
- actual_points_change INTEGER NOT NULL,
- source_user_id UUID, -- NULL if system-generated
- related_entity_type VARCHAR(50),
- related_entity_id UUID, -- For specific post/comment/etc.
- notes TEXT,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- CONSTRAINT fk_reputation_events_user_id FOREIGN KEY (user_id) REFERENCES public.users (id) ON DELETE CASCADE,
- CONSTRAINT fk_reputation_events_event_type_id FOREIGN KEY (event_type_id) REFERENCES public.reputation_event_types (id) ON DELETE RESTRICT, -- RESTRICT to prevent deleting event types that are referenced
- CONSTRAINT fk_reputation_events_source_user_id FOREIGN KEY (source_user_id) REFERENCES public.users (id) ON DELETE SET NULL -- Set to NULL if source user is deleted
-);
-
--- User Reputation Scores Table (Aggregated Score)
-CREATE TABLE public.user_reputation_scores
-(
- user_id UUID PRIMARY KEY,
- score BIGINT NOT NULL DEFAULT 0,
- last_calculated_at TIMESTAMP NOT NULL DEFAULT NOW(),
- CONSTRAINT fk_user_reputation_scores_user_id FOREIGN KEY (user_id) REFERENCES public.users (id) ON DELETE CASCADE
-);
-
--- Schema: auth
-
--- User Login Info Table
-CREATE TABLE auth.user_login_info
-(
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- user_id UUID NOT NULL, -- Links to public.users
- provider_id VARCHAR(255) NOT NULL,
- provider_key VARCHAR(255) NOT NULL,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
- CONSTRAINT fk_auth_user_login_info_user_id FOREIGN KEY (user_id) REFERENCES public.users (id) ON DELETE CASCADE,
- CONSTRAINT uq_auth_provider_id_key UNIQUE (provider_id, provider_key)
-);
-
--- User OAuth2 Info Table (for storing tokens)
-CREATE TABLE auth.user_oauth2_info
-(
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- login_info_id UUID UNIQUE NOT NULL,
- access_token TEXT NOT NULL,
- token_type VARCHAR(50),
- expires_in BIGINT,
- refresh_token TEXT,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
- scope TEXT,
- CONSTRAINT fk_auth_user_oauth2_info_login_info_id FOREIGN KEY (login_info_id) REFERENCES auth.user_login_info (id) ON DELETE CASCADE
-);
-
--- Roles Table
-CREATE TABLE auth.roles
-(
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- name VARCHAR(255) UNIQUE NOT NULL,
- description TEXT,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW()
-);
-
--- Permissions Table
-CREATE TABLE auth.permissions
-(
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- name VARCHAR(255) UNIQUE NOT NULL,
- description TEXT,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW()
-);
-
--- Role Permissions Table (Many-to-Many)
-CREATE TABLE auth.role_permissions
-(
- role_id UUID NOT NULL,
- permission_id UUID NOT NULL,
- PRIMARY KEY (role_id, permission_id),
- CONSTRAINT fk_auth_role_permissions_role_id FOREIGN KEY (role_id) REFERENCES auth.roles (id) ON DELETE CASCADE,
- CONSTRAINT fk_auth_role_permissions_permission_id FOREIGN KEY (permission_id) REFERENCES auth.permissions (id) ON DELETE CASCADE
-);
-
--- User Roles Table (Many-to-Many)
-CREATE TABLE auth.user_roles
-(
- user_id UUID NOT NULL, -- Links to public.users
- role_id UUID NOT NULL,
- PRIMARY KEY (user_id, role_id),
- CONSTRAINT fk_auth_user_roles_user_id FOREIGN KEY (user_id) REFERENCES public.users (id) ON DELETE CASCADE,
- CONSTRAINT fk_auth_user_roles_role_id FOREIGN KEY (role_id) REFERENCES auth.roles (id) ON DELETE CASCADE
-);
-
--- AT Protocol Authorization Servers
-CREATE TABLE auth.atprotocol_authorization_servers
-(
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- issuer_url VARCHAR(255) UNIQUE NOT NULL,
- authorization_endpoint VARCHAR(255),
- token_endpoint VARCHAR(255),
- pushed_authorization_request_endpoint VARCHAR(255),
- dpop_signing_alg_values_supported TEXT,
- scopes_supported TEXT,
- client_id_metadata_document_supported BOOLEAN,
- metadata_fetched_at TIMESTAMP NOT NULL DEFAULT NOW(),
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW()
-);
-
--- AT Protocol Client Metadata
-CREATE TABLE auth.atprotocol_client_metadata
-(
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- client_id_url VARCHAR(255) UNIQUE NOT NULL,
- client_name VARCHAR(255),
- client_uri VARCHAR(255),
- logo_uri VARCHAR(255),
- tos_uri VARCHAR(255),
- policy_uri VARCHAR(255),
- redirect_uris TEXT,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW()
-);
-
-# --- !Downs
-
-DROP TABLE auth.atprotocol_client_metadata;
-DROP TABLE auth.atprotocol_authorization_servers;
-DROP TABLE auth.user_roles;
-DROP TABLE auth.role_permissions;
-DROP TABLE auth.permissions;
-DROP TABLE auth.roles;
-DROP TABLE auth.user_oauth2_info;
-DROP TABLE auth.user_login_info;
-DROP TABLE public.user_reputation_scores;
-DROP TABLE public.reputation_events;
-DROP TABLE public.reputation_event_types;
-DROP TABLE public.user_pds_info;
-DROP TABLE public.users;
-
-DROP SCHEMA auth;
\ No newline at end of file
diff --git a/conf/evolutions/default/60.sql b/conf/evolutions/default/60.sql
deleted file mode 100644
index c02849e9..00000000
--- a/conf/evolutions/default/60.sql
+++ /dev/null
@@ -1,65 +0,0 @@
--- !Ups
-
--- Curator conflict resolution table
--- Allows curators to correct/override merge algorithm decisions before applying
-
-CREATE TABLE tree.wip_resolution (
- resolution_id SERIAL PRIMARY KEY,
- change_set_id INTEGER NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE,
-
- -- What we're resolving (at least one must be set)
- wip_haplogroup_id INTEGER REFERENCES tree.wip_haplogroup(wip_haplogroup_id) ON DELETE CASCADE,
- wip_reparent_id INTEGER REFERENCES tree.wip_reparent(wip_reparent_id) ON DELETE CASCADE,
-
- -- Resolution type
- resolution_type VARCHAR(50) NOT NULL
- CHECK (resolution_type IN ('REPARENT', 'EDIT_VARIANTS', 'MERGE_EXISTING', 'DEFER')),
-
- -- REPARENT: Change the parent of a node
- new_parent_id INTEGER, -- Production haplogroup ID
- new_parent_placeholder_id INTEGER, -- WIP haplogroup placeholder ID
-
- -- MERGE_EXISTING: Map WIP node to existing production node (don't create)
- merge_target_id INTEGER, -- Production haplogroup to merge into
-
- -- EDIT_VARIANTS: Add or remove variant associations
- variants_to_add JSONB DEFAULT '[]', -- Array of variant IDs to add
- variants_to_remove JSONB DEFAULT '[]', -- Array of variant IDs to remove
-
- -- DEFER: Move to manual review queue
- defer_reason TEXT,
- defer_priority VARCHAR(20) DEFAULT 'NORMAL'
- CHECK (defer_priority IN ('LOW', 'NORMAL', 'HIGH', 'CRITICAL')),
-
- -- Curator tracking
- curator_id VARCHAR(100) NOT NULL,
- curator_notes TEXT,
-
- -- Status tracking
- status VARCHAR(20) NOT NULL DEFAULT 'PENDING'
- CHECK (status IN ('PENDING', 'APPLIED', 'CANCELLED')),
-
- -- Timestamps
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- applied_at TIMESTAMP,
-
- -- At least one target must be specified
- CONSTRAINT wip_resolution_has_target CHECK (
- wip_haplogroup_id IS NOT NULL OR
- wip_reparent_id IS NOT NULL
- )
-);
-
-CREATE INDEX idx_wip_resolution_change_set ON tree.wip_resolution(change_set_id);
-CREATE INDEX idx_wip_resolution_status ON tree.wip_resolution(status);
-CREATE INDEX idx_wip_resolution_type ON tree.wip_resolution(resolution_type);
-CREATE INDEX idx_wip_resolution_wip_haplogroup ON tree.wip_resolution(wip_haplogroup_id) WHERE wip_haplogroup_id IS NOT NULL;
-CREATE INDEX idx_wip_resolution_wip_reparent ON tree.wip_resolution(wip_reparent_id) WHERE wip_reparent_id IS NOT NULL;
-
--- Add comment explaining the table
-COMMENT ON TABLE tree.wip_resolution IS 'Curator corrections to merge algorithm decisions. Applied during change set promotion.';
-COMMENT ON COLUMN tree.wip_resolution.resolution_type IS 'REPARENT=change parent, EDIT_VARIANTS=add/remove SNPs, MERGE_EXISTING=map to existing node, DEFER=needs manual review';
-
--- !Downs
-
-DROP TABLE IF EXISTS tree.wip_resolution;
diff --git a/conf/evolutions/default/61.sql b/conf/evolutions/default/61.sql
deleted file mode 100644
index 75c5acb8..00000000
--- a/conf/evolutions/default/61.sql
+++ /dev/null
@@ -1,146 +0,0 @@
--- !Ups
-
--- ============================================================================
--- Evolution 61: Haplogroup Discovery System Tables
--- Creates the discovery pipeline tables in the tree schema for tracking
--- private variants, proposed branches, evidence, and curator actions.
--- ============================================================================
-
--- Private variants discovered in biosamples (unified across both Citizen and External)
-CREATE TABLE tree.biosample_private_variant (
- id SERIAL PRIMARY KEY,
- sample_type VARCHAR(20) NOT NULL CHECK (sample_type IN ('CITIZEN', 'EXTERNAL')),
- sample_id INTEGER NOT NULL,
- sample_guid UUID NOT NULL,
- variant_id INTEGER NOT NULL,
- haplogroup_type VARCHAR(10) NOT NULL CHECK (haplogroup_type IN ('Y', 'MT')),
- terminal_haplogroup_id INTEGER NOT NULL REFERENCES tree.haplogroup(haplogroup_id),
- discovered_at TIMESTAMP NOT NULL DEFAULT NOW(),
- status VARCHAR(20) NOT NULL DEFAULT 'ACTIVE'
- CHECK (status IN ('ACTIVE', 'PROMOTED', 'INVALIDATED')),
- UNIQUE(sample_type, sample_id, variant_id, haplogroup_type)
-);
-
-CREATE INDEX idx_bpv_sample ON tree.biosample_private_variant(sample_type, sample_id);
-CREATE INDEX idx_bpv_guid ON tree.biosample_private_variant(sample_guid);
-CREATE INDEX idx_bpv_variant ON tree.biosample_private_variant(variant_id);
-CREATE INDEX idx_bpv_terminal ON tree.biosample_private_variant(terminal_haplogroup_id);
-CREATE INDEX idx_bpv_status ON tree.biosample_private_variant(status);
-
-COMMENT ON TABLE tree.biosample_private_variant IS 'Tracks private (mismatching) variants discovered in biosamples that extend beyond the current terminal haplogroup.';
-
--- Proposed branches awaiting consensus/review
-CREATE TABLE tree.proposed_branch (
- id SERIAL PRIMARY KEY,
- parent_haplogroup_id INTEGER NOT NULL REFERENCES tree.haplogroup(haplogroup_id),
- proposed_name VARCHAR(100),
- haplogroup_type VARCHAR(10) NOT NULL CHECK (haplogroup_type IN ('Y', 'MT')),
- status VARCHAR(20) NOT NULL DEFAULT 'PENDING'
- CHECK (status IN ('PENDING', 'READY_FOR_REVIEW', 'UNDER_REVIEW',
- 'ACCEPTED', 'PROMOTED', 'REJECTED', 'SPLIT')),
- consensus_count INTEGER NOT NULL DEFAULT 0,
- confidence_score DOUBLE PRECISION NOT NULL DEFAULT 0.0,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
- reviewed_at TIMESTAMP,
- reviewed_by VARCHAR(255),
- notes TEXT,
- promoted_haplogroup_id INTEGER REFERENCES tree.haplogroup(haplogroup_id)
-);
-
-CREATE INDEX idx_pb_parent ON tree.proposed_branch(parent_haplogroup_id);
-CREATE INDEX idx_pb_status ON tree.proposed_branch(status);
-CREATE INDEX idx_pb_type ON tree.proposed_branch(haplogroup_type);
-
-COMMENT ON TABLE tree.proposed_branch IS 'Candidate branches proposed by the discovery system when shared private variants are detected across multiple biosamples.';
-
--- Variants associated with proposed branches
-CREATE TABLE tree.proposed_branch_variant (
- id SERIAL PRIMARY KEY,
- proposed_branch_id INTEGER NOT NULL REFERENCES tree.proposed_branch(id) ON DELETE CASCADE,
- variant_id INTEGER NOT NULL,
- is_defining BOOLEAN NOT NULL DEFAULT TRUE,
- evidence_count INTEGER NOT NULL DEFAULT 1,
- first_observed_at TIMESTAMP NOT NULL DEFAULT NOW(),
- last_observed_at TIMESTAMP NOT NULL DEFAULT NOW(),
- UNIQUE(proposed_branch_id, variant_id)
-);
-
-CREATE INDEX idx_pbv_variant ON tree.proposed_branch_variant(variant_id);
-
-COMMENT ON TABLE tree.proposed_branch_variant IS 'Links proposed branches to their defining variants with evidence tracking.';
-
--- Biosamples supporting proposed branches (unified across both types)
-CREATE TABLE tree.proposed_branch_evidence (
- id SERIAL PRIMARY KEY,
- proposed_branch_id INTEGER NOT NULL REFERENCES tree.proposed_branch(id) ON DELETE CASCADE,
- sample_type VARCHAR(20) NOT NULL CHECK (sample_type IN ('CITIZEN', 'EXTERNAL')),
- sample_id INTEGER NOT NULL,
- sample_guid UUID NOT NULL,
- added_at TIMESTAMP NOT NULL DEFAULT NOW(),
- variant_match_count INTEGER NOT NULL DEFAULT 0,
- variant_mismatch_count INTEGER NOT NULL DEFAULT 0,
- UNIQUE(proposed_branch_id, sample_type, sample_id)
-);
-
-CREATE INDEX idx_pbe_sample ON tree.proposed_branch_evidence(sample_type, sample_id);
-CREATE INDEX idx_pbe_guid ON tree.proposed_branch_evidence(sample_guid);
-
-COMMENT ON TABLE tree.proposed_branch_evidence IS 'Links biosamples (Citizen or External) to the proposed branches they support.';
-
--- Curator audit trail
-CREATE TABLE tree.curator_action (
- id SERIAL PRIMARY KEY,
- curator_id VARCHAR(255) NOT NULL,
- action_type VARCHAR(50) NOT NULL
- CHECK (action_type IN ('REVIEW', 'ACCEPT', 'REJECT', 'MODIFY',
- 'SPLIT', 'MERGE', 'CREATE', 'DELETE',
- 'REASSIGN', 'NAME_VARIANT')),
- target_type VARCHAR(50) NOT NULL
- CHECK (target_type IN ('PROPOSED_BRANCH', 'HAPLOGROUP',
- 'HAPLOGROUP_RELATIONSHIP', 'VARIANT', 'BIOSAMPLE')),
- target_id INTEGER NOT NULL,
- previous_state JSONB,
- new_state JSONB,
- reason TEXT,
- created_at TIMESTAMP NOT NULL DEFAULT NOW()
-);
-
-CREATE INDEX idx_ca_curator ON tree.curator_action(curator_id);
-CREATE INDEX idx_ca_timestamp ON tree.curator_action(created_at);
-CREATE INDEX idx_ca_target ON tree.curator_action(target_type, target_id);
-
-COMMENT ON TABLE tree.curator_action IS 'Immutable audit trail of all curator operations on proposed branches, haplogroups, and variants.';
-
--- Configuration for consensus thresholds
-CREATE TABLE tree.discovery_config (
- id SERIAL PRIMARY KEY,
- haplogroup_type VARCHAR(10) NOT NULL CHECK (haplogroup_type IN ('Y', 'MT')),
- config_key VARCHAR(100) NOT NULL,
- config_value TEXT NOT NULL,
- description TEXT,
- updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_by VARCHAR(255),
- UNIQUE(haplogroup_type, config_key)
-);
-
-INSERT INTO tree.discovery_config (haplogroup_type, config_key, config_value, description) VALUES
-('Y', 'consensus_threshold', '3', 'Minimum biosamples required to reach ReadyForReview'),
-('Y', 'auto_promote_threshold', '10', 'Biosamples required for automatic promotion consideration'),
-('Y', 'confidence_threshold', '0.95', 'Minimum confidence score for promotion'),
-('Y', 'jaccard_match_threshold', '0.8', 'Minimum Jaccard similarity to match an existing proposal'),
-('MT', 'consensus_threshold', '3', 'Minimum biosamples required to reach ReadyForReview'),
-('MT', 'auto_promote_threshold', '10', 'Biosamples required for automatic promotion consideration'),
-('MT', 'confidence_threshold', '0.95', 'Minimum confidence score for promotion'),
-('MT', 'jaccard_match_threshold', '0.8', 'Minimum Jaccard similarity to match an existing proposal');
-
-COMMENT ON TABLE tree.discovery_config IS 'Per-haplogroup-type configuration for discovery thresholds and scoring parameters.';
-
--- !Downs
-
-DROP TABLE IF EXISTS tree.discovery_config;
-DROP TABLE IF EXISTS tree.curator_action;
-DROP TABLE IF EXISTS tree.proposed_branch_evidence;
-DROP TABLE IF EXISTS tree.proposed_branch_variant;
-DROP TABLE IF EXISTS tree.proposed_branch;
-DROP TABLE IF EXISTS tree.biosample_private_variant;
diff --git a/conf/evolutions/default/62.sql b/conf/evolutions/default/62.sql
deleted file mode 100644
index a68a03c4..00000000
--- a/conf/evolutions/default/62.sql
+++ /dev/null
@@ -1,30 +0,0 @@
-# --- !Ups
-
--- Per-biosample callable loci storage for accurate mutation rate calculation
--- Uses polymorphic reference pattern (consistent with tree.biosample_private_variant)
-CREATE TABLE genomics.biosample_callable_loci (
- id SERIAL PRIMARY KEY,
- sample_type VARCHAR(20) NOT NULL,
- sample_id INTEGER NOT NULL,
- sample_guid UUID,
- chromosome VARCHAR(20) NOT NULL,
- total_callable_bp BIGINT NOT NULL,
- region_count INTEGER,
- bed_file_hash VARCHAR(64),
- computed_at TIMESTAMP NOT NULL,
- source_test_type_id INTEGER REFERENCES test_type_definition(id),
- y_xdegen_callable_bp BIGINT,
- y_ampliconic_callable_bp BIGINT,
- y_palindromic_callable_bp BIGINT,
- UNIQUE(sample_type, sample_id, chromosome),
- CHECK (sample_type IN ('citizen', 'external'))
-);
-
-CREATE INDEX idx_bcl_sample ON genomics.biosample_callable_loci(sample_type, sample_id);
-CREATE INDEX idx_bcl_guid ON genomics.biosample_callable_loci(sample_guid) WHERE sample_guid IS NOT NULL;
-
-# --- !Downs
-
-DROP INDEX IF EXISTS idx_bcl_guid;
-DROP INDEX IF EXISTS idx_bcl_sample;
-DROP TABLE IF EXISTS genomics.biosample_callable_loci;
diff --git a/conf/evolutions/default/63.sql b/conf/evolutions/default/63.sql
deleted file mode 100644
index 18e1bc96..00000000
--- a/conf/evolutions/default/63.sql
+++ /dev/null
@@ -1,29 +0,0 @@
-# --- !Ups
-
--- Genealogical anchors for historical age constraints on haplogroup branches
--- Supports known MRCAs, most distant known ancestors, and ancient DNA calibration points
-CREATE TABLE tree.genealogical_anchor (
- id SERIAL PRIMARY KEY,
- haplogroup_id INTEGER NOT NULL REFERENCES tree.haplogroup(haplogroup_id) ON DELETE CASCADE,
- anchor_type VARCHAR(50) NOT NULL,
- date_ce INTEGER NOT NULL,
- date_uncertainty_years INTEGER,
- confidence NUMERIC(3,2),
- description TEXT,
- source VARCHAR(500),
- carbon_date_bp INTEGER,
- carbon_date_sigma INTEGER,
- created_by VARCHAR(255),
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- CHECK (anchor_type IN ('KNOWN_MRCA', 'MDKA', 'ANCIENT_DNA')),
- CHECK (confidence IS NULL OR (confidence >= 0 AND confidence <= 1))
-);
-
-CREATE INDEX idx_genealogical_anchor_haplogroup ON tree.genealogical_anchor(haplogroup_id);
-CREATE INDEX idx_genealogical_anchor_type ON tree.genealogical_anchor(anchor_type);
-
-# --- !Downs
-
-DROP INDEX IF EXISTS tree.idx_genealogical_anchor_type;
-DROP INDEX IF EXISTS tree.idx_genealogical_anchor_haplogroup;
-DROP TABLE IF EXISTS tree.genealogical_anchor;
diff --git a/conf/evolutions/default/64.sql b/conf/evolutions/default/64.sql
deleted file mode 100644
index 4cc8d88d..00000000
--- a/conf/evolutions/default/64.sql
+++ /dev/null
@@ -1,28 +0,0 @@
-# --- !Ups
-
--- Ancestral STR motifs (modal haplotypes) per haplogroup branch
--- Used for STR-based age estimation and group project modal computations
-CREATE TABLE tree.haplogroup_ancestral_str (
- id SERIAL PRIMARY KEY,
- haplogroup_id INTEGER NOT NULL REFERENCES tree.haplogroup(haplogroup_id) ON DELETE CASCADE,
- marker_name VARCHAR(50) NOT NULL,
- ancestral_value INTEGER,
- ancestral_value_alt INTEGER[],
- confidence NUMERIC(3,2),
- supporting_samples INTEGER,
- variance NUMERIC(8,4),
- computed_at TIMESTAMP NOT NULL DEFAULT NOW(),
- method VARCHAR(50) NOT NULL DEFAULT 'MODAL',
- UNIQUE(haplogroup_id, marker_name),
- CHECK (method IN ('MODAL', 'PHYLOGENETIC', 'MANUAL')),
- CHECK (confidence IS NULL OR (confidence >= 0 AND confidence <= 1))
-);
-
-CREATE INDEX idx_hg_ancestral_str_haplogroup ON tree.haplogroup_ancestral_str(haplogroup_id);
-CREATE INDEX idx_hg_ancestral_str_marker ON tree.haplogroup_ancestral_str(marker_name);
-
-# --- !Downs
-
-DROP INDEX IF EXISTS tree.idx_hg_ancestral_str_marker;
-DROP INDEX IF EXISTS tree.idx_hg_ancestral_str_haplogroup;
-DROP TABLE IF EXISTS tree.haplogroup_ancestral_str;
diff --git a/conf/evolutions/default/65.sql b/conf/evolutions/default/65.sql
deleted file mode 100644
index c524da68..00000000
--- a/conf/evolutions/default/65.sql
+++ /dev/null
@@ -1,78 +0,0 @@
--- # --- !Ups
-
--- Phase 2: Consolidate alignment_coverage into alignment_metadata as JSONB
-ALTER TABLE public.alignment_metadata ADD COLUMN coverage JSONB;
-
--- Migrate existing coverage data into the new JSONB column
-UPDATE public.alignment_metadata am
-SET coverage = jsonb_build_object(
- 'meanDepth', ac.mean_depth,
- 'medianDepth', ac.median_depth,
- 'percentCoverageAt1x', ac.percent_coverage_at_1x,
- 'percentCoverageAt5x', ac.percent_coverage_at_5x,
- 'percentCoverageAt10x', ac.percent_coverage_at_10x,
- 'percentCoverageAt20x', ac.percent_coverage_at_20x,
- 'percentCoverageAt30x', ac.percent_coverage_at_30x,
- 'basesNoCoverage', ac.bases_no_coverage,
- 'basesLowQualityMapping', ac.bases_low_quality_mapping,
- 'basesCallable', ac.bases_callable,
- 'meanMappingQuality', ac.mean_mapping_quality
-)
-FROM public.alignment_coverage ac
-WHERE ac.alignment_metadata_id = am.id;
-
--- Expression indexes for aggregation queries on JSONB coverage fields
-CREATE INDEX idx_am_coverage_mean_depth ON public.alignment_metadata (((coverage->>'meanDepth')::double precision)) WHERE coverage IS NOT NULL;
-CREATE INDEX idx_am_coverage_bases_callable ON public.alignment_metadata (((coverage->>'basesCallable')::bigint)) WHERE coverage IS NOT NULL;
-CREATE INDEX idx_am_coverage_mean_mapping_quality ON public.alignment_metadata (((coverage->>'meanMappingQuality')::double precision)) WHERE coverage IS NOT NULL;
-
--- Drop the old table
-DROP TABLE public.alignment_coverage;
-
-
--- # --- !Downs
-
--- Recreate alignment_coverage table
-CREATE TABLE public.alignment_coverage (
- alignment_metadata_id BIGINT PRIMARY KEY REFERENCES alignment_metadata(id) ON DELETE CASCADE,
- mean_depth DOUBLE PRECISION,
- median_depth DOUBLE PRECISION,
- percent_coverage_at_1x DOUBLE PRECISION,
- percent_coverage_at_5x DOUBLE PRECISION,
- percent_coverage_at_10x DOUBLE PRECISION,
- percent_coverage_at_20x DOUBLE PRECISION,
- percent_coverage_at_30x DOUBLE PRECISION,
- bases_no_coverage BIGINT,
- bases_low_quality_mapping BIGINT,
- bases_callable BIGINT,
- mean_mapping_quality DOUBLE PRECISION
-);
-
--- Migrate data back from JSONB to separate table
-INSERT INTO public.alignment_coverage (
- alignment_metadata_id, mean_depth, median_depth,
- percent_coverage_at_1x, percent_coverage_at_5x, percent_coverage_at_10x,
- percent_coverage_at_20x, percent_coverage_at_30x,
- bases_no_coverage, bases_low_quality_mapping, bases_callable,
- mean_mapping_quality
-)
-SELECT id,
- (coverage->>'meanDepth')::double precision,
- (coverage->>'medianDepth')::double precision,
- (coverage->>'percentCoverageAt1x')::double precision,
- (coverage->>'percentCoverageAt5x')::double precision,
- (coverage->>'percentCoverageAt10x')::double precision,
- (coverage->>'percentCoverageAt20x')::double precision,
- (coverage->>'percentCoverageAt30x')::double precision,
- (coverage->>'basesNoCoverage')::bigint,
- (coverage->>'basesLowQualityMapping')::bigint,
- (coverage->>'basesCallable')::bigint,
- (coverage->>'meanMappingQuality')::double precision
-FROM public.alignment_metadata
-WHERE coverage IS NOT NULL;
-
--- Drop indexes and column
-DROP INDEX IF EXISTS idx_am_coverage_mean_depth;
-DROP INDEX IF EXISTS idx_am_coverage_bases_callable;
-DROP INDEX IF EXISTS idx_am_coverage_mean_mapping_quality;
-ALTER TABLE public.alignment_metadata DROP COLUMN coverage;
diff --git a/conf/evolutions/default/66.sql b/conf/evolutions/default/66.sql
deleted file mode 100644
index 759f7c95..00000000
--- a/conf/evolutions/default/66.sql
+++ /dev/null
@@ -1,105 +0,0 @@
--- # --- !Ups
-
--- Phase 3: Consolidate biosample_original_haplogroup into biosample as JSONB array
-ALTER TABLE public.biosample ADD COLUMN original_haplogroups JSONB DEFAULT '[]'::jsonb;
-
--- Migrate existing data from the separate table into the JSONB array column
-UPDATE public.biosample b SET original_haplogroups = (
- SELECT COALESCE(jsonb_agg(jsonb_build_object(
- 'publicationId', boh.publication_id,
- 'yHaplogroupResult', boh.y_haplogroup_result,
- 'mtHaplogroupResult', boh.mt_haplogroup_result,
- 'notes', boh.notes
- ) ORDER BY boh.publication_id), '[]'::jsonb)
- FROM biosample_original_haplogroup boh
- WHERE boh.biosample_id = b.id
-)
-WHERE b.id IN (SELECT DISTINCT biosample_id FROM biosample_original_haplogroup);
-
--- GIN index for containment queries (e.g., finding biosamples by publication_id in array)
-CREATE INDEX idx_biosample_orig_hg ON public.biosample
- USING GIN (original_haplogroups jsonb_path_ops)
- WHERE original_haplogroups != '[]'::jsonb;
-
--- Same for citizen_biosample
-ALTER TABLE public.citizen_biosample ADD COLUMN original_haplogroups JSONB DEFAULT '[]'::jsonb;
-
-UPDATE public.citizen_biosample cb SET original_haplogroups = (
- SELECT COALESCE(jsonb_agg(jsonb_build_object(
- 'publicationId', cboh.publication_id,
- 'yHaplogroupResult', cboh.y_haplogroup_result,
- 'mtHaplogroupResult', cboh.mt_haplogroup_result,
- 'notes', cboh.notes
- ) ORDER BY cboh.publication_id), '[]'::jsonb)
- FROM citizen_biosample_original_haplogroup cboh
- WHERE cboh.citizen_biosample_id = cb.id
-)
-WHERE cb.id IN (SELECT DISTINCT citizen_biosample_id FROM citizen_biosample_original_haplogroup);
-
-CREATE INDEX idx_citizen_biosample_orig_hg ON public.citizen_biosample
- USING GIN (original_haplogroups jsonb_path_ops)
- WHERE original_haplogroups != '[]'::jsonb;
-
--- Drop old tables
-DROP TABLE public.biosample_original_haplogroup;
-DROP TABLE public.citizen_biosample_original_haplogroup;
-
-
--- # --- !Downs
-
--- Recreate biosample_original_haplogroup table
-CREATE TABLE public.biosample_original_haplogroup (
- id SERIAL PRIMARY KEY,
- biosample_id INT REFERENCES biosample (id) ON DELETE CASCADE,
- publication_id INT REFERENCES publication (id) ON DELETE CASCADE,
- original_y_haplogroup VARCHAR(255),
- original_mt_haplogroup VARCHAR(255),
- notes TEXT,
- y_haplogroup_result JSONB,
- mt_haplogroup_result JSONB,
- UNIQUE (biosample_id, publication_id)
-);
-
--- Migrate data back from JSONB array
-INSERT INTO public.biosample_original_haplogroup (
- biosample_id, publication_id, y_haplogroup_result, mt_haplogroup_result, notes
-)
-SELECT b.id,
- (entry->>'publicationId')::int,
- entry->'yHaplogroupResult',
- entry->'mtHaplogroupResult',
- entry->>'notes'
-FROM public.biosample b,
- jsonb_array_elements(b.original_haplogroups) AS entry
-WHERE b.original_haplogroups != '[]'::jsonb;
-
--- Recreate citizen_biosample_original_haplogroup table
-CREATE TABLE public.citizen_biosample_original_haplogroup (
- id SERIAL PRIMARY KEY,
- citizen_biosample_id INT REFERENCES citizen_biosample (id) ON DELETE CASCADE,
- publication_id INT REFERENCES publication (id) ON DELETE CASCADE,
- original_y_haplogroup VARCHAR(255),
- original_mt_haplogroup VARCHAR(255),
- notes TEXT,
- y_haplogroup_result JSONB,
- mt_haplogroup_result JSONB,
- UNIQUE (citizen_biosample_id, publication_id)
-);
-
-INSERT INTO public.citizen_biosample_original_haplogroup (
- citizen_biosample_id, publication_id, y_haplogroup_result, mt_haplogroup_result, notes
-)
-SELECT cb.id,
- (entry->>'publicationId')::int,
- entry->'yHaplogroupResult',
- entry->'mtHaplogroupResult',
- entry->>'notes'
-FROM public.citizen_biosample cb,
- jsonb_array_elements(cb.original_haplogroups) AS entry
-WHERE cb.original_haplogroups != '[]'::jsonb;
-
--- Drop JSONB columns and indexes
-DROP INDEX IF EXISTS idx_biosample_orig_hg;
-DROP INDEX IF EXISTS idx_citizen_biosample_orig_hg;
-ALTER TABLE public.biosample DROP COLUMN original_haplogroups;
-ALTER TABLE public.citizen_biosample DROP COLUMN original_haplogroups;
diff --git a/conf/evolutions/default/67.sql b/conf/evolutions/default/67.sql
deleted file mode 100644
index 7726fc2c..00000000
--- a/conf/evolutions/default/67.sql
+++ /dev/null
@@ -1,28 +0,0 @@
--- # --- !Ups
-
-CREATE TABLE public.instrument_observation (
- id SERIAL PRIMARY KEY,
- at_uri VARCHAR(512) UNIQUE NOT NULL,
- at_cid VARCHAR(128),
- instrument_id VARCHAR(255) NOT NULL,
- lab_name VARCHAR(255) NOT NULL,
- biosample_ref VARCHAR(512) NOT NULL,
- sequence_run_ref VARCHAR(512),
- platform VARCHAR(100),
- instrument_model VARCHAR(255),
- flowcell_id VARCHAR(255),
- run_date TIMESTAMP,
- confidence VARCHAR(20) DEFAULT 'INFERRED' CHECK (confidence IN ('KNOWN', 'INFERRED', 'GUESSED')),
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP
-);
-
-CREATE INDEX idx_instrument_obs_instrument ON public.instrument_observation (instrument_id);
-CREATE INDEX idx_instrument_obs_lab ON public.instrument_observation (lab_name);
-CREATE INDEX idx_instrument_obs_biosample ON public.instrument_observation (biosample_ref);
-CREATE INDEX idx_instrument_obs_at_uri ON public.instrument_observation (at_uri);
-
-
--- # --- !Downs
-
-DROP TABLE IF EXISTS public.instrument_observation;
diff --git a/conf/evolutions/default/68.sql b/conf/evolutions/default/68.sql
deleted file mode 100644
index 9a567e14..00000000
--- a/conf/evolutions/default/68.sql
+++ /dev/null
@@ -1,53 +0,0 @@
-# --- !Ups
-
--- Instrument association proposals for consensus-based lab inference
-CREATE TABLE public.instrument_association_proposal (
- id SERIAL PRIMARY KEY,
- instrument_id VARCHAR(255) NOT NULL,
- proposed_lab_name VARCHAR(255) NOT NULL,
- proposed_manufacturer VARCHAR(255),
- proposed_model VARCHAR(255),
- existing_lab_id INTEGER REFERENCES public.sequencing_lab(id),
- observation_count INTEGER NOT NULL DEFAULT 0,
- distinct_citizen_count INTEGER NOT NULL DEFAULT 0,
- confidence_score DOUBLE PRECISION NOT NULL DEFAULT 0.0,
- earliest_observation TIMESTAMP,
- latest_observation TIMESTAMP,
- status VARCHAR(30) NOT NULL DEFAULT 'PENDING'
- CHECK (status IN ('PENDING', 'READY_FOR_REVIEW', 'UNDER_REVIEW',
- 'ACCEPTED', 'REJECTED', 'SUPERSEDED')),
- reviewed_at TIMESTAMP,
- reviewed_by VARCHAR(255),
- review_notes TEXT,
- accepted_lab_id INTEGER REFERENCES public.sequencing_lab(id),
- accepted_instrument_id INTEGER REFERENCES public.sequencer_instrument(id),
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW()
-);
-
-CREATE INDEX idx_iap_status ON instrument_association_proposal(status);
-CREATE INDEX idx_iap_instrument ON instrument_association_proposal(instrument_id);
-CREATE UNIQUE INDEX idx_iap_active_instrument ON instrument_association_proposal(instrument_id)
- WHERE status NOT IN ('ACCEPTED', 'REJECTED', 'SUPERSEDED');
-
--- Add observation tracking columns to existing sequencer_instrument table
-ALTER TABLE public.sequencer_instrument
- ADD COLUMN source VARCHAR(30) DEFAULT 'CURATOR'
- CHECK (source IN ('CURATOR', 'CONSENSUS', 'PUBLICATION')),
- ADD COLUMN observation_count INTEGER DEFAULT 0,
- ADD COLUMN confidence_score DOUBLE PRECISION DEFAULT 1.0,
- ADD COLUMN last_observed_at TIMESTAMP;
-
-CREATE INDEX idx_si_confidence ON public.sequencer_instrument(confidence_score DESC);
-
-# --- !Downs
-
-DROP INDEX IF EXISTS idx_si_confidence;
-
-ALTER TABLE public.sequencer_instrument
- DROP COLUMN IF EXISTS source,
- DROP COLUMN IF EXISTS observation_count,
- DROP COLUMN IF EXISTS confidence_score,
- DROP COLUMN IF EXISTS last_observed_at;
-
-DROP TABLE IF EXISTS public.instrument_association_proposal;
diff --git a/conf/evolutions/default/69.sql b/conf/evolutions/default/69.sql
deleted file mode 100644
index ff55a69e..00000000
--- a/conf/evolutions/default/69.sql
+++ /dev/null
@@ -1,99 +0,0 @@
-# --- !Ups
-
--- Vendor-specific targeted test types
-INSERT INTO test_type_definition (
- code, display_name, category, vendor, target_type,
- expected_min_depth, expected_target_depth,
- supports_haplogroup_y, supports_haplogroup_mt, supports_autosomal_ibd, supports_ancestry,
- typical_file_formats, description
-) VALUES
-('BIG_Y_700', 'FTDNA Big Y-700', 'SEQUENCING', 'FamilyTreeDNA', 'Y_CHROMOSOME',
- 30.0, 50.0,
- TRUE, FALSE, FALSE, FALSE,
- ARRAY['BAM', 'VCF', 'BED'], 'FamilyTreeDNA Big Y-700 targeted Y-chromosome sequencing covering ~700 STRs and ~200K Y-SNPs.'),
-
-('Y_ELITE', 'Full Genomes Y Elite', 'SEQUENCING', 'Full Genomes', 'Y_CHROMOSOME',
- 15.0, 30.0,
- TRUE, FALSE, FALSE, FALSE,
- ARRAY['BAM', 'CRAM', 'VCF'], 'Full Genomes Y Elite whole Y-chromosome sequencing at 30x depth.'),
-
-('Y_PRIME', 'YSEQ Y-Prime', 'SEQUENCING', 'YSEQ', 'Y_CHROMOSOME',
- 15.0, 30.0,
- TRUE, FALSE, FALSE, FALSE,
- ARRAY['BAM', 'VCF'], 'YSEQ Y-Prime Y-chromosome sequencing product.'),
-
-('MT_FULL_SEQUENCE', 'mtDNA Full Sequence', 'SEQUENCING', 'FamilyTreeDNA', 'MT_DNA',
- 500.0, 1000.0,
- FALSE, TRUE, FALSE, FALSE,
- ARRAY['BAM', 'FASTA', 'VCF'], 'Full mitochondrial genome sequencing (16,569 bp).'),
-
-('MT_PLUS', 'FTDNA mtDNA Plus', 'SEQUENCING', 'FamilyTreeDNA', 'MT_DNA',
- 200.0, 500.0,
- FALSE, TRUE, FALSE, FALSE,
- ARRAY['BAM', 'FASTA', 'VCF'], 'FamilyTreeDNA mtDNA Plus covering full mitochondrial genome.'),
-
-('BIG_Y_500', 'FTDNA Big Y-500 (Legacy)', 'SEQUENCING', 'FamilyTreeDNA', 'Y_CHROMOSOME',
- 20.0, 40.0,
- TRUE, FALSE, FALSE, FALSE,
- ARRAY['BAM', 'VCF', 'BED'], 'Legacy FamilyTreeDNA Big Y-500 product (superseded by Big Y-700).');
-
--- Link Big Y-500 successor to Big Y-700
-UPDATE test_type_definition
-SET successor_test_type_id = (SELECT id FROM test_type_definition WHERE code = 'BIG_Y_700')
-WHERE code = 'BIG_Y_500';
-
--- Target region definitions for targeted tests
-CREATE TABLE public.test_type_target_region (
- id SERIAL PRIMARY KEY,
- test_type_id INTEGER NOT NULL REFERENCES test_type_definition(id),
- contig_name VARCHAR(50) NOT NULL,
- start_position INTEGER,
- end_position INTEGER,
- region_name VARCHAR(100) NOT NULL,
- region_type VARCHAR(50) NOT NULL
- CHECK (region_type IN ('FULL', 'PARTIAL', 'TARGETED_SNPS')),
- expected_coverage_pct DOUBLE PRECISION,
- expected_min_depth DOUBLE PRECISION,
- UNIQUE(test_type_id, contig_name, start_position, end_position)
-);
-
-CREATE INDEX idx_tttr_test_type ON test_type_target_region(test_type_id);
-
--- Seed target regions for targeted tests
--- Big Y-700: Y chromosome combbed region
-INSERT INTO test_type_target_region (test_type_id, contig_name, start_position, end_position, region_name, region_type, expected_coverage_pct, expected_min_depth)
-SELECT id, 'chrY', 2781480, 56887903, 'Y Combbed Region', 'TARGETED_SNPS', 0.95, 30.0
-FROM test_type_definition WHERE code = 'BIG_Y_700';
-
--- Big Y-500: Y chromosome combbed region (narrower)
-INSERT INTO test_type_target_region (test_type_id, contig_name, start_position, end_position, region_name, region_type, expected_coverage_pct, expected_min_depth)
-SELECT id, 'chrY', 2781480, 56887903, 'Y Combbed Region', 'TARGETED_SNPS', 0.90, 20.0
-FROM test_type_definition WHERE code = 'BIG_Y_500';
-
--- Y Elite: Full Y chromosome
-INSERT INTO test_type_target_region (test_type_id, contig_name, region_name, region_type, expected_coverage_pct, expected_min_depth)
-SELECT id, 'chrY', 'Full Y Chromosome', 'FULL', 0.98, 15.0
-FROM test_type_definition WHERE code = 'Y_ELITE';
-
--- Y Prime: Full Y chromosome
-INSERT INTO test_type_target_region (test_type_id, contig_name, region_name, region_type, expected_coverage_pct, expected_min_depth)
-SELECT id, 'chrY', 'Full Y Chromosome', 'FULL', 0.95, 15.0
-FROM test_type_definition WHERE code = 'Y_PRIME';
-
--- MT Full Sequence: Full mitochondrial genome
-INSERT INTO test_type_target_region (test_type_id, contig_name, start_position, end_position, region_name, region_type, expected_coverage_pct, expected_min_depth)
-SELECT id, 'chrM', 1, 16569, 'Full Mitochondrial Genome', 'FULL', 0.999, 500.0
-FROM test_type_definition WHERE code = 'MT_FULL_SEQUENCE';
-
--- MT Plus: Full mitochondrial genome
-INSERT INTO test_type_target_region (test_type_id, contig_name, start_position, end_position, region_name, region_type, expected_coverage_pct, expected_min_depth)
-SELECT id, 'chrM', 1, 16569, 'Full Mitochondrial Genome', 'FULL', 0.999, 200.0
-FROM test_type_definition WHERE code = 'MT_PLUS';
-
-# --- !Downs
-
-DROP TABLE IF EXISTS public.test_type_target_region;
-
-DELETE FROM test_type_definition WHERE code IN (
- 'BIG_Y_700', 'Y_ELITE', 'Y_PRIME', 'MT_FULL_SEQUENCE', 'MT_PLUS', 'BIG_Y_500'
-);
diff --git a/conf/evolutions/default/7.sql b/conf/evolutions/default/7.sql
deleted file mode 100644
index 5f70dea7..00000000
--- a/conf/evolutions/default/7.sql
+++ /dev/null
@@ -1,306 +0,0 @@
-# --- !Ups
---- New tables to enable Pan Genome instead of traditional linear references
-
--- -----------------------------------------------------------
--- 1. New Tables: Pangenome Core & Reference Data
--- -----------------------------------------------------------
-
--- Table: public.pangenome_graph
--- Defines unique versions or builds of the pangenome graph itself.
-CREATE TABLE public.pangenome_graph
-(
- id BIGSERIAL PRIMARY KEY,
- name VARCHAR(255) NOT NULL UNIQUE,
- description TEXT,
- creation_date TIMESTAMP NOT NULL DEFAULT NOW(),
- checksum VARCHAR(255)
-);
-
--- Table: public.assembly_metadata
--- Stores metadata about the source assemblies (e.g., GRCh37, GRCh38, CHM13v2.0)
-CREATE TABLE public.assembly_metadata
-(
- id BIGSERIAL PRIMARY KEY,
- assembly_name VARCHAR(255) NOT NULL UNIQUE,
- accession VARCHAR(255),
- release_date DATE,
- source_organism VARCHAR(255),
- assembly_level VARCHAR(50),
- metadata JSONB
-);
-
--- Table: public.pangenome_node
--- Represents the atomic, shared DNA segments that are the building blocks of the pangenome graph.
-CREATE TABLE public.pangenome_node
-(
- id BIGSERIAL PRIMARY KEY,
- graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id),
- sequence TEXT NOT NULL,
- length INTEGER NOT NULL,
- is_core BOOLEAN,
- annotation_id INTEGER
-);
-
--- Table: public.pangenome_edge
--- Defines the connections or adjacencies between pangenome_node's.
-CREATE TABLE public.pangenome_edge
-(
- id BIGSERIAL PRIMARY KEY,
- graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id),
- source_node_id INTEGER NOT NULL REFERENCES public.pangenome_node (id),
- target_node_id INTEGER NOT NULL REFERENCES public.pangenome_node (id),
- source_orientation VARCHAR(1) NOT NULL CHECK (source_orientation IN ('+', '-')),
- target_orientation VARCHAR(1) NOT NULL CHECK (target_orientation IN ('+', '-')),
- type VARCHAR(50),
- UNIQUE (graph_id, source_node_id, target_node_id, source_orientation, target_orientation)
-);
-
--- Table: public.pangenome_path
--- Represents specific linear sequences (like GRCh38 chr1, Y-DNA reference) as ordered traversals through pangenome_nodes.
-CREATE TABLE public.pangenome_path
-(
- id BIGSERIAL PRIMARY KEY,
- graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id),
- name VARCHAR(255) NOT NULL,
- node_sequence INTEGER[] NOT NULL,
- length BIGINT NOT NULL,
- source_assembly_id INTEGER REFERENCES public.assembly_metadata (id),
- UNIQUE (graph_id, name)
-);
-
--- Table: public.gene_annotation (Optional, if not existing or needs separate table)
--- Stores metadata about genes.
-CREATE TABLE public.gene_annotation
-(
- id BIGSERIAL PRIMARY KEY,
- gene_symbol VARCHAR(255),
- gene_id VARCHAR(255),
- description TEXT,
- representative_sequence_node_id INTEGER REFERENCES public.pangenome_node (id)
-);
-
--- Add the foreign key to pangenome_node now that gene_annotation exists
-ALTER TABLE public.pangenome_node
- ADD CONSTRAINT fk_pangenome_node_annotation FOREIGN KEY (annotation_id) REFERENCES public.gene_annotation (id);
-
--- -----------------------------------------------------------
--- 2. New Tables: Variant Representation & Linkage
--- -----------------------------------------------------------
-
--- Table: public.canonical_pangenome_variant
--- Represents a unique, abstract variant (SNP, INDEL, Structural Variant) as defined within a specific pangenome graph.
-CREATE TABLE public.canonical_pangenome_variant
-(
- id BIGSERIAL PRIMARY KEY,
- pangenome_graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id),
- variant_type VARCHAR(50) NOT NULL,
- variant_nodes INTEGER[] NOT NULL,
- variant_edges INTEGER[] NOT NULL DEFAULT '{}',
- reference_path_id INTEGER REFERENCES public.pangenome_path (id),
- reference_start_position INTEGER,
- reference_end_position INTEGER,
- reference_allele_sequence TEXT,
- alternate_allele_sequence TEXT,
- canonical_hash VARCHAR(255) NOT NULL UNIQUE,
- description TEXT,
- creation_date TIMESTAMP NOT NULL DEFAULT NOW()
-);
-
--- Table: public.pangenome_variant_link
--- Bridges existing public.variant (legacy marker) to its canonical pangenome representation.
-CREATE TABLE public.pangenome_variant_link
-(
- pangenome_variant_link_id BIGSERIAL PRIMARY KEY,
- variant_id INTEGER NOT NULL REFERENCES public.variant (variant_id) ON DELETE CASCADE,
- canonical_pangenome_variant_id INTEGER NOT NULL REFERENCES public.canonical_pangenome_variant (id) ON DELETE CASCADE,
- pangenome_graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id),
- description TEXT,
- mapping_source VARCHAR(255) NOT NULL,
- mapping_date TIMESTAMP NOT NULL DEFAULT NOW(),
- UNIQUE (variant_id, canonical_pangenome_variant_id)
-);
-
--- -----------------------------------------------------------
--- 3. New Table: Sample-Specific Variant Calls
--- -----------------------------------------------------------
-
--- Table: public.reported_variant_pangenome
--- Stores the detailed variant calls detected for each sample_guid against a pangenome graph.
-CREATE TABLE public.reported_variant_pangenome
-(
- id BIGSERIAL PRIMARY KEY,
- sample_guid UUID NOT NULL,
- graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id),
- variant_type VARCHAR(50) NOT NULL CHECK (variant_type IN (
- 'SNP', 'INDEL', 'SV_INSERTION',
- 'SV_DELETION', 'SV_INVERSION',
- 'SV_DUPLICATION', 'SV_TRANSLOCATION',
- 'PAV_GENE', 'STR', 'CNV', 'COMPLEX'
- )),
- reference_path_id INTEGER REFERENCES public.pangenome_path (id),
- reference_start_position INTEGER,
- reference_end_position INTEGER,
- variant_nodes INTEGER[] NOT NULL,
- variant_edges INTEGER[] NOT NULL DEFAULT '{}',
- alternate_allele_sequence TEXT,
- reference_allele_sequence TEXT,
- reference_repeat_count INTEGER,
- alternate_repeat_count INTEGER,
- allele_fraction DOUBLE PRECISION,
- depth INTEGER,
- reported_date TIMESTAMP NOT NULL DEFAULT NOW(),
- provenance VARCHAR(255) NOT NULL,
- confidence_score DOUBLE PRECISION NOT NULL,
- notes TEXT,
- status VARCHAR(255) NOT NULL,
- zygosity VARCHAR(10) CHECK (zygosity IN ('HOM_REF', 'HET', 'HOM_ALT', 'UNKNOWN')),
- haplotype_information JSONB
-);
-
--- IBD Discovery and Consensus
-
--- -----------------------------------------------------------
--- 4. New Table: public.validation_service
--- -----------------------------------------------------------
-CREATE TABLE public.validation_service
-(
- id BIGSERIAL PRIMARY KEY,
- guid UUID NOT NULL UNIQUE,
- name VARCHAR(255) NOT NULL UNIQUE,
- description TEXT,
- trust_level VARCHAR(50)
-);
-
--- -----------------------------------------------------------
--- 5. New Table: public.ibd_discovery_index
--- A central, privacy-preserving index for IBD matches.
--- This table represents the *match event itself*.
--- -----------------------------------------------------------
-CREATE TABLE public.ibd_discovery_index
-(
- id BIGSERIAL PRIMARY KEY,
- sample_guid_1 UUID NOT NULL,
- sample_guid_2 UUID NOT NULL,
- pangenome_graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id),
- match_region_type VARCHAR(50) NOT NULL CHECK (match_region_type IN
- ('AUTOSOMAL', 'X_CHROMOSOME', 'Y_CHROMOSOME', 'MT_DNA',
- 'ALL_CHROMOSOMES')),
- total_shared_cm_approx DOUBLE PRECISION,
- num_shared_segments_approx INTEGER,
- is_publicly_discoverable BOOLEAN NOT NULL DEFAULT FALSE,
- consensus_status VARCHAR(50) NOT NULL DEFAULT 'INITIATED',
- last_consensus_update TIMESTAMP NOT NULL DEFAULT NOW(),
- validation_service_guid UUID REFERENCES public.validation_service (guid),
- validation_timestamp TIMESTAMP,
- indexed_by_service VARCHAR(255),
- indexed_date TIMESTAMP NOT NULL DEFAULT NOW()
-);
-
--- Unique constraint for IBD pairs (order-independent)
-CREATE UNIQUE INDEX idx_unique_ibd_discovery_pair ON public.ibd_discovery_index (
- LEAST(sample_guid_1, sample_guid_2),
- GREATEST(sample_guid_1, sample_guid_2),
- pangenome_graph_id,
- match_region_type
- );
-
--- Indexes for ibd_discovery_index
-CREATE INDEX idx_ibd_discovery_sample1 ON public.ibd_discovery_index (sample_guid_1);
-CREATE INDEX idx_ibd_discovery_sample2 ON public.ibd_discovery_index (sample_guid_2);
-CREATE INDEX idx_ibd_discovery_graph_id ON public.ibd_discovery_index (pangenome_graph_id);
-CREATE INDEX idx_ibd_discovery_region_type ON public.ibd_discovery_index (match_region_type);
-CREATE INDEX idx_ibd_discovery_cm_approx ON public.ibd_discovery_index (total_shared_cm_approx);
-CREATE INDEX idx_ibd_discovery_public_status ON public.ibd_discovery_index (is_publicly_discoverable);
-CREATE INDEX idx_ibd_discovery_consensus_status ON public.ibd_discovery_index (consensus_status);
-
-
--- -----------------------------------------------------------
--- 6. New Table: public.ibd_pds_attestation
--- Records a specific PDS's attestation or validation for an IBD match.
--- -----------------------------------------------------------
-CREATE TABLE public.ibd_pds_attestation
-(
- id BIGSERIAL PRIMARY KEY,
- ibd_discovery_index_id BIGINT NOT NULL REFERENCES public.ibd_discovery_index (id) ON DELETE CASCADE,
- attesting_pds_guid UUID NOT NULL,
- attesting_sample_guid UUID NOT NULL,
- attestation_timestamp TIMESTAMP NOT NULL DEFAULT NOW(),
- attestation_signature TEXT NOT NULL,
-
- match_summary_hash VARCHAR(255) NOT NULL,
- attestation_type VARCHAR(50) NOT NULL CHECK (attestation_type IN
- ('INITIAL_REPORT', 'CONFIRMATION', 'DISPUTE', 'REVOCATION',
- 'THIRD_PARTY_VALIDATION')),
- attestation_notes TEXT,
- UNIQUE (ibd_discovery_index_id, attesting_pds_guid, attestation_type)
-);
-
--- Indexes for ibd_pds_attestation
-CREATE INDEX idx_ibd_attestation_index_id ON public.ibd_pds_attestation (ibd_discovery_index_id);
-CREATE INDEX idx_ibd_attestation_pds_guid ON public.ibd_pds_attestation (attesting_pds_guid);
-CREATE INDEX idx_ibd_attestation_type ON public.ibd_pds_attestation (attestation_type);
-
-
--- -----------------------------------------------------------
--- 6. Revisions to Existing Genetic Genealogy Tables
--- -----------------------------------------------------------
-
--- Table: public.genbank_contig
--- Add columns to link existing contigs to their pangenome context.
-ALTER TABLE public.genbank_contig
- ADD COLUMN pangenome_path_id INTEGER REFERENCES public.pangenome_path (id);
-
--- -----------------------------------------------------------
--- 7. Indexes for Performance
--- -----------------------------------------------------------
-
--- Indexes for public.pangenome_node
-CREATE INDEX idx_pangenome_node_graph_id ON public.pangenome_node (graph_id);
-
--- Indexes for public.pangenome_path
-CREATE INDEX idx_pangenome_path_graph_id ON public.pangenome_path (graph_id);
-CREATE INDEX idx_pangenome_path_assembly_id ON public.pangenome_path (source_assembly_id);
-
--- Indexes for public.canonical_pangenome_variant
-CREATE INDEX idx_cpv_graph_id ON public.canonical_pangenome_variant (pangenome_graph_id);
-CREATE INDEX idx_cpv_variant_type ON public.canonical_pangenome_variant (variant_type);
-CREATE INDEX idx_cpv_ref_path_pos ON public.canonical_pangenome_variant (reference_path_id, reference_start_position);
-CREATE INDEX idx_cpv_variant_nodes ON public.canonical_pangenome_variant USING GIN (variant_nodes);
-CREATE INDEX idx_cpv_variant_edges ON public.canonical_pangenome_variant USING GIN (variant_edges);
-
--- Indexes for public.pangenome_variant_link
-CREATE INDEX idx_pvlink_variant_id ON public.pangenome_variant_link (variant_id);
-CREATE INDEX idx_pvlink_canonical_id ON public.pangenome_variant_link (canonical_pangenome_variant_id);
-CREATE INDEX idx_pvlink_graph_id ON public.pangenome_variant_link (pangenome_graph_id);
-
--- Indexes for public.reported_variant_pangenome
-CREATE INDEX idx_rvp_sample_guid ON public.reported_variant_pangenome (sample_guid);
-CREATE INDEX idx_rvp_graph_id ON public.reported_variant_pangenome (graph_id);
-CREATE INDEX idx_rvp_variant_type ON public.reported_variant_pangenome (variant_type);
-CREATE INDEX idx_rvp_ref_path_pos ON public.reported_variant_pangenome (reference_path_id, reference_start_position);
-CREATE INDEX idx_rvp_variant_nodes ON public.reported_variant_pangenome USING GIN (variant_nodes);
-CREATE INDEX idx_rvp_variant_edges ON public.reported_variant_pangenome USING GIN (variant_edges);
-CREATE INDEX idx_rvp_confidence_score ON public.reported_variant_pangenome (confidence_score);
-
-
--- This is destructive, but the tables were never actually used in the application code
-DROP TABLE reported_variant;
-DROP TABLE reported_negative_variant;
-
-# --- !Downs
-ALTER TABLE public.genbank_contig
- DROP COLUMN pangenome_path_id;
-
-DROP TABLE public.ibd_pds_attestation;
-DROP TABLE public.ibd_discovery_index;
-DROP TABLE public.validation_service;
-DROP TABLE public.reported_variant_pangenome;
-DROP TABLE public.pangenome_variant_link;
-DROP TABLE public.canonical_pangenome_variant;
-DROP TABLE public.pangenome_node;
-DROP TABLE public.gene_annotation;
-DROP TABLE public.pangenome_path;
-DROP TABLE public.pangenome_edge;
-DROP TABLE public.pangenome_node;
-DROP TABLE public.assembly_metadata;
-DROP TABLE public.pangenome_graph;
\ No newline at end of file
diff --git a/conf/evolutions/default/70.sql b/conf/evolutions/default/70.sql
deleted file mode 100644
index f76ba3cb..00000000
--- a/conf/evolutions/default/70.sql
+++ /dev/null
@@ -1,50 +0,0 @@
-# --- !Ups
-
--- Additional vendor-specific chip test types
-INSERT INTO test_type_definition (
- code, display_name, category, vendor, target_type,
- expected_marker_count,
- supports_haplogroup_y, supports_haplogroup_mt, supports_autosomal_ibd, supports_ancestry,
- typical_file_formats, description
-) VALUES
-('ARRAY_23ANDME_V4', '23andMe v4 Chip', 'GENOTYPING', '23andMe', 'MIXED',
- 570000,
- TRUE, TRUE, TRUE, TRUE,
- ARRAY['TXT', 'CSV'], '23andMe v4 chip (~570K markers). Superseded by v5.'),
-
-('ARRAY_ANCESTRY_V1', 'AncestryDNA v1', 'GENOTYPING', 'AncestryDNA', 'MIXED',
- 700000,
- TRUE, TRUE, TRUE, TRUE,
- ARRAY['TXT', 'CSV'], 'AncestryDNA v1 chip. Superseded by v2.'),
-
-('ARRAY_MYHERITAGE', 'MyHeritage DNA', 'GENOTYPING', 'MyHeritage', 'MIXED',
- 700000,
- TRUE, TRUE, TRUE, TRUE,
- ARRAY['CSV'], 'MyHeritage DNA chip (~700K markers).'),
-
-('ARRAY_LIVINGDNA', 'LivingDNA', 'GENOTYPING', 'LivingDNA', 'MIXED',
- 630000,
- TRUE, TRUE, TRUE, TRUE,
- ARRAY['CSV', 'TXT'], 'LivingDNA chip (~630K markers).'),
-
-('ARRAY_CUSTOM', 'Custom SNP Array', 'GENOTYPING', NULL, 'MIXED',
- NULL,
- TRUE, TRUE, FALSE, FALSE,
- ARRAY['TXT', 'CSV', 'VCF'], 'Custom or unrecognized SNP array data.');
-
--- Link deprecated versions to successors
-UPDATE test_type_definition
-SET successor_test_type_id = (SELECT id FROM test_type_definition WHERE code = 'SNP_ARRAY_23ANDME'),
- deprecated_at = '2017-08-01'
-WHERE code = 'ARRAY_23ANDME_V4';
-
-UPDATE test_type_definition
-SET successor_test_type_id = (SELECT id FROM test_type_definition WHERE code = 'SNP_ARRAY_ANCESTRY'),
- deprecated_at = '2019-01-01'
-WHERE code = 'ARRAY_ANCESTRY_V1';
-
-# --- !Downs
-
-DELETE FROM test_type_definition WHERE code IN (
- 'ARRAY_23ANDME_V4', 'ARRAY_ANCESTRY_V1', 'ARRAY_MYHERITAGE', 'ARRAY_LIVINGDNA', 'ARRAY_CUSTOM'
-);
diff --git a/conf/evolutions/default/71.sql b/conf/evolutions/default/71.sql
deleted file mode 100644
index c806bf1e..00000000
--- a/conf/evolutions/default/71.sql
+++ /dev/null
@@ -1,71 +0,0 @@
-# --- !Ups
-
--- Per-test-type coverage expectation profiles for variant calling confidence
-CREATE TABLE public.coverage_expectation_profile (
- id SERIAL PRIMARY KEY,
- test_type_id INTEGER NOT NULL REFERENCES public.test_type_definition(id),
- contig_name VARCHAR(50) NOT NULL,
- variant_class VARCHAR(50) NOT NULL DEFAULT 'SNP', -- SNP, STR, INDEL
- min_depth_high DOUBLE PRECISION NOT NULL, -- minimum depth for HIGH confidence
- min_depth_medium DOUBLE PRECISION NOT NULL, -- minimum depth for MEDIUM confidence
- min_depth_low DOUBLE PRECISION NOT NULL, -- minimum depth for LOW confidence
- min_coverage_pct DOUBLE PRECISION, -- minimum % bases covered at 1x
- min_mapping_quality DOUBLE PRECISION, -- minimum mean mapping quality
- min_callable_pct DOUBLE PRECISION, -- minimum % callable bases
- notes TEXT,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
- UNIQUE (test_type_id, contig_name, variant_class)
-);
-
-CREATE INDEX idx_cep_test_type ON public.coverage_expectation_profile (test_type_id);
-
--- Seed profiles for WGS
-INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, min_mapping_quality, min_callable_pct, notes)
-SELECT id, 'Y', 'SNP', 20.0, 10.0, 5.0, 0.95, 30.0, 0.90, 'WGS Y-chromosome SNP calling thresholds'
-FROM test_type_definition WHERE code = 'WGS';
-
-INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, min_mapping_quality, min_callable_pct, notes)
-SELECT id, 'MT', 'SNP', 100.0, 50.0, 20.0, 0.99, 30.0, 0.95, 'WGS mtDNA SNP calling thresholds (high copy number)'
-FROM test_type_definition WHERE code = 'WGS';
-
-INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, min_mapping_quality, notes)
-SELECT id, 'Y', 'STR', 30.0, 15.0, 8.0, 0.90, 20.0, 'WGS Y-chromosome STR calling thresholds (higher depth needed)'
-FROM test_type_definition WHERE code = 'WGS';
-
--- Seed profiles for BIG_Y_700
-INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, min_mapping_quality, min_callable_pct, notes)
-SELECT id, 'Y', 'SNP', 50.0, 25.0, 10.0, 0.90, 25.0, 0.85, 'BIG-Y 700 Y-chromosome SNP calling thresholds'
-FROM test_type_definition WHERE code = 'BIG_Y_700';
-
-INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, min_mapping_quality, notes)
-SELECT id, 'Y', 'STR', 60.0, 30.0, 15.0, 0.85, 20.0, 'BIG-Y 700 Y-chromosome STR calling thresholds'
-FROM test_type_definition WHERE code = 'BIG_Y_700';
-
--- Seed profiles for BIG_Y_500
-INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, min_mapping_quality, min_callable_pct, notes)
-SELECT id, 'Y', 'SNP', 40.0, 20.0, 8.0, 0.85, 25.0, 0.80, 'BIG-Y 500 Y-chromosome SNP calling thresholds'
-FROM test_type_definition WHERE code = 'BIG_Y_500';
-
--- Seed profiles for Y_ELITE
-INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, min_mapping_quality, min_callable_pct, notes)
-SELECT id, 'Y', 'SNP', 60.0, 30.0, 12.0, 0.92, 25.0, 0.88, 'Y-Elite Y-chromosome SNP calling thresholds'
-FROM test_type_definition WHERE code = 'Y_ELITE';
-
--- Seed profiles for MT_FULL_SEQUENCE
-INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, min_mapping_quality, min_callable_pct, notes)
-SELECT id, 'MT', 'SNP', 200.0, 100.0, 30.0, 0.99, 30.0, 0.98, 'Full mtDNA sequence SNP calling thresholds'
-FROM test_type_definition WHERE code = 'MT_FULL_SEQUENCE';
-
--- Seed profiles for SNP arrays (chip data — marker-based, not depth-based)
-INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, notes)
-SELECT id, 'Y', 'SNP', 0.0, 0.0, 0.0, 0.0, 'Chip-based: confidence from marker count, not depth'
-FROM test_type_definition WHERE code = 'SNP_ARRAY_23ANDME';
-
-INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, notes)
-SELECT id, 'Y', 'SNP', 0.0, 0.0, 0.0, 0.0, 'Chip-based: confidence from marker count, not depth'
-FROM test_type_definition WHERE code = 'SNP_ARRAY_ANCESTRY';
-
-# --- !Downs
-
-DROP TABLE IF EXISTS public.coverage_expectation_profile;
diff --git a/conf/evolutions/default/72.sql b/conf/evolutions/default/72.sql
deleted file mode 100644
index 5f858b63..00000000
--- a/conf/evolutions/default/72.sql
+++ /dev/null
@@ -1,60 +0,0 @@
-# --- !Ups
-
--- Group Project definition
-CREATE TABLE public.group_project (
- id SERIAL PRIMARY KEY,
- project_guid UUID NOT NULL UNIQUE DEFAULT gen_random_uuid(),
- project_name VARCHAR(100) NOT NULL,
- project_type VARCHAR(30) NOT NULL CHECK (project_type IN ('HAPLOGROUP', 'SURNAME', 'GEOGRAPHIC', 'ETHNIC', 'RESEARCH', 'CUSTOM')),
- target_haplogroup VARCHAR(100),
- target_lineage VARCHAR(10) CHECK (target_lineage IN ('Y_DNA', 'MT_DNA', 'BOTH')),
- description TEXT,
- background_info TEXT,
- join_policy VARCHAR(30) NOT NULL DEFAULT 'APPROVAL_REQUIRED' CHECK (join_policy IN ('OPEN', 'APPROVAL_REQUIRED', 'INVITE_ONLY', 'HAPLOGROUP_VERIFIED')),
- haplogroup_requirement VARCHAR(255),
- member_list_visibility VARCHAR(20) NOT NULL DEFAULT 'MEMBERS_ONLY' CHECK (member_list_visibility IN ('PUBLIC', 'MEMBERS_ONLY', 'ADMINS_ONLY', 'HIDDEN')),
- str_policy VARCHAR(20) NOT NULL DEFAULT 'DISTANCE_ONLY' CHECK (str_policy IN ('HIDDEN', 'DISTANCE_ONLY', 'MODAL_COMPARISON', 'MEMBERS_ONLY_RAW', 'PUBLIC_RAW')),
- snp_policy VARCHAR(30) NOT NULL DEFAULT 'TERMINAL_ONLY' CHECK (snp_policy IN ('HIDDEN', 'TERMINAL_ONLY', 'FULL_PATH', 'WITH_PRIVATE_VARIANTS')),
- public_tree_view BOOLEAN NOT NULL DEFAULT FALSE,
- succession_policy VARCHAR(30) DEFAULT 'CO_ADMIN_INHERITS' CHECK (succession_policy IN ('CO_ADMIN_INHERITS', 'MEMBER_VOTE', 'DECODINGUS_APPOINTS', 'PROJECT_CLOSES')),
- owner_did VARCHAR(255) NOT NULL,
- at_uri VARCHAR(512),
- at_cid VARCHAR(255),
- deleted BOOLEAN NOT NULL DEFAULT FALSE,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW()
-);
-
-CREATE INDEX idx_group_project_owner ON public.group_project (owner_did);
-CREATE INDEX idx_group_project_type ON public.group_project (project_type);
-CREATE INDEX idx_group_project_haplogroup ON public.group_project (target_haplogroup) WHERE target_haplogroup IS NOT NULL;
-
--- Group Project membership
-CREATE TABLE public.group_project_member (
- id SERIAL PRIMARY KEY,
- group_project_id INTEGER NOT NULL REFERENCES public.group_project(id),
- citizen_did VARCHAR(255) NOT NULL,
- biosample_at_uri VARCHAR(512),
- role VARCHAR(20) NOT NULL DEFAULT 'MEMBER' CHECK (role IN ('ADMIN', 'CO_ADMIN', 'MODERATOR', 'CURATOR', 'MEMBER')),
- status VARCHAR(20) NOT NULL DEFAULT 'PENDING_APPROVAL' CHECK (status IN ('PENDING_APPROVAL', 'ACTIVE', 'SUSPENDED', 'LEFT', 'REMOVED')),
- display_name VARCHAR(50),
- kit_id VARCHAR(50),
- visibility JSONB NOT NULL DEFAULT '{}',
- subgroup_ids TEXT[] NOT NULL DEFAULT '{}',
- contribution_level VARCHAR(20) DEFAULT 'OBSERVER' CHECK (contribution_level IN ('OBSERVER', 'CONTRIBUTOR', 'ACTIVE_RESEARCHER')),
- joined_at TIMESTAMP,
- at_uri VARCHAR(512),
- at_cid VARCHAR(255),
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
- UNIQUE (group_project_id, citizen_did)
-);
-
-CREATE INDEX idx_gpm_project ON public.group_project_member (group_project_id);
-CREATE INDEX idx_gpm_citizen ON public.group_project_member (citizen_did);
-CREATE INDEX idx_gpm_status ON public.group_project_member (status);
-
-# --- !Downs
-
-DROP TABLE IF EXISTS public.group_project_member;
-DROP TABLE IF EXISTS public.group_project;
diff --git a/conf/evolutions/default/73.sql b/conf/evolutions/default/73.sql
deleted file mode 100644
index ba388cb5..00000000
--- a/conf/evolutions/default/73.sql
+++ /dev/null
@@ -1,30 +0,0 @@
-# --- !Ups
-
-CREATE SCHEMA IF NOT EXISTS billing;
-
-CREATE TABLE billing.patron_subscription (
- id SERIAL PRIMARY KEY,
- user_id UUID NOT NULL,
- patron_tier VARCHAR(30) NOT NULL CHECK (patron_tier IN ('SUPPORTER', 'CONTRIBUTOR', 'SUSTAINER', 'FOUNDING_PATRON')),
- status VARCHAR(20) NOT NULL DEFAULT 'ACTIVE' CHECK (status IN ('ACTIVE', 'CANCELLED', 'PAST_DUE', 'EXPIRED')),
- payment_provider VARCHAR(20) NOT NULL CHECK (payment_provider IN ('STRIPE', 'PAYPAL')),
- provider_subscription_id VARCHAR(255),
- provider_customer_id VARCHAR(255),
- amount_cents INTEGER NOT NULL,
- currency VARCHAR(3) NOT NULL DEFAULT 'USD',
- billing_interval VARCHAR(10) NOT NULL CHECK (billing_interval IN ('MONTHLY', 'YEARLY')),
- current_period_start TIMESTAMPTZ,
- current_period_end TIMESTAMPTZ,
- cancelled_at TIMESTAMPTZ,
- created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
-);
-
-CREATE INDEX idx_patron_sub_user ON billing.patron_subscription (user_id);
-CREATE INDEX idx_patron_sub_status ON billing.patron_subscription (status);
-CREATE INDEX idx_patron_sub_provider ON billing.patron_subscription (payment_provider, provider_subscription_id);
-
-# --- !Downs
-
-DROP TABLE IF EXISTS billing.patron_subscription;
-DROP SCHEMA IF EXISTS billing;
diff --git a/conf/evolutions/default/74.sql b/conf/evolutions/default/74.sql
deleted file mode 100644
index b32d4547..00000000
--- a/conf/evolutions/default/74.sql
+++ /dev/null
@@ -1,106 +0,0 @@
-# --- !Ups
-
--- Match Discovery Engine tables (IBD-AV-1)
-
-CREATE TABLE match_suggestion (
- id BIGSERIAL PRIMARY KEY,
- target_sample_guid UUID NOT NULL,
- suggested_sample_guid UUID NOT NULL,
- suggestion_type VARCHAR(30) NOT NULL CHECK (suggestion_type IN ('SHARED_MATCH', 'POPULATION_OVERLAP', 'HAPLOGROUP')),
- score DOUBLE PRECISION NOT NULL,
- metadata JSONB,
- status VARCHAR(20) NOT NULL DEFAULT 'ACTIVE' CHECK (status IN ('ACTIVE', 'DISMISSED', 'EXPIRED', 'CONVERTED')),
- created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
- expires_at TIMESTAMPTZ,
- UNIQUE (target_sample_guid, suggested_sample_guid, suggestion_type)
-);
-
-CREATE INDEX idx_match_suggestion_target ON match_suggestion(target_sample_guid, status);
-CREATE INDEX idx_match_suggestion_expires ON match_suggestion(expires_at) WHERE status = 'ACTIVE';
-
-CREATE TABLE population_breakdown_cache (
- id BIGSERIAL PRIMARY KEY,
- sample_guid UUID NOT NULL UNIQUE,
- breakdown JSONB NOT NULL,
- breakdown_hash VARCHAR(64) NOT NULL,
- cached_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
- source_at_uri VARCHAR(500)
-);
-
-CREATE INDEX idx_pop_cache_sample ON population_breakdown_cache(sample_guid);
-
-CREATE TABLE population_overlap_score (
- id BIGSERIAL PRIMARY KEY,
- sample_guid_1 UUID NOT NULL,
- sample_guid_2 UUID NOT NULL,
- overlap_score DOUBLE PRECISION NOT NULL,
- computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
- UNIQUE (sample_guid_1, sample_guid_2),
- CHECK (sample_guid_1 < sample_guid_2)
-);
-
-CREATE INDEX idx_pop_overlap_sample1 ON population_overlap_score(sample_guid_1);
-CREATE INDEX idx_pop_overlap_sample2 ON population_overlap_score(sample_guid_2);
-
--- Match Request & Consent Tracking tables (IBD-AV-2)
-
-CREATE TABLE match_request_tracking (
- id BIGSERIAL PRIMARY KEY,
- at_uri VARCHAR(500) NOT NULL UNIQUE,
- requester_did VARCHAR(255) NOT NULL,
- target_did VARCHAR(255),
- from_sample_guid UUID NOT NULL,
- to_sample_guid UUID NOT NULL,
- request_type VARCHAR(30) NOT NULL DEFAULT 'FULL' CHECK (request_type IN ('AUTOSOMAL', 'Y_CHROMOSOME', 'MT_DNA', 'FULL')),
- status VARCHAR(20) NOT NULL DEFAULT 'PENDING' CHECK (status IN ('PENDING', 'ACCEPTED', 'DECLINED', 'EXPIRED', 'WITHDRAWN', 'CANCELLED')),
- discovery_reason JSONB,
- message TEXT,
- created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
- expires_at TIMESTAMPTZ,
- completed_at TIMESTAMPTZ
-);
-
-CREATE INDEX idx_match_req_requester ON match_request_tracking(requester_did, status);
-CREATE INDEX idx_match_req_target ON match_request_tracking(target_did, status);
-CREATE INDEX idx_match_req_to_sample ON match_request_tracking(to_sample_guid, status);
-CREATE INDEX idx_match_req_from_sample ON match_request_tracking(from_sample_guid, status);
-
-CREATE TABLE match_consent_tracking (
- id BIGSERIAL PRIMARY KEY,
- at_uri VARCHAR(500) NOT NULL UNIQUE,
- consenting_did VARCHAR(255) NOT NULL,
- sample_guid UUID NOT NULL,
- consent_level VARCHAR(20) NOT NULL CHECK (consent_level IN ('FULL', 'ANONYMOUS', 'PROJECT_ONLY')),
- allowed_match_types JSONB,
- share_contact_info BOOLEAN NOT NULL DEFAULT FALSE,
- consented_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
- expires_at TIMESTAMPTZ,
- revoked_at TIMESTAMPTZ
-);
-
-CREATE INDEX idx_match_consent_did ON match_consent_tracking(consenting_did);
-CREATE INDEX idx_match_consent_sample ON match_consent_tracking(sample_guid);
-
--- Extend ibd_discovery_index for IBD-AV-4 linkage
-ALTER TABLE ibd_discovery_index
- ADD COLUMN IF NOT EXISTS match_request_at_uri VARCHAR(500),
- ADD COLUMN IF NOT EXISTS requester_did VARCHAR(255),
- ADD COLUMN IF NOT EXISTS target_did VARCHAR(255);
-
-CREATE INDEX idx_ibd_request_uri ON ibd_discovery_index(match_request_at_uri);
-
-# --- !Downs
-
-DROP INDEX IF EXISTS idx_ibd_request_uri;
-
-ALTER TABLE ibd_discovery_index
- DROP COLUMN IF EXISTS match_request_at_uri,
- DROP COLUMN IF EXISTS requester_did,
- DROP COLUMN IF EXISTS target_did;
-
-DROP TABLE IF EXISTS match_consent_tracking;
-DROP TABLE IF EXISTS match_request_tracking;
-DROP TABLE IF EXISTS population_overlap_score;
-DROP TABLE IF EXISTS population_breakdown_cache;
-DROP TABLE IF EXISTS match_suggestion;
diff --git a/conf/evolutions/default/8.sql b/conf/evolutions/default/8.sql
deleted file mode 100644
index f81f54ae..00000000
--- a/conf/evolutions/default/8.sql
+++ /dev/null
@@ -1,59 +0,0 @@
-# --- !Ups
---- New tables to enable Pan Genome instead of traditional linear references
-
--- New Table: public.pangenome_alignment_metrics
--- Tracks quality metrics for alignment against a pangenome graph or its specific paths/nodes.
-CREATE TABLE public.pangenome_alignment_metrics
-(
- id BIGSERIAL PRIMARY KEY,
- sequence_file_id BIGINT NOT NULL REFERENCES public.sequence_file (id) ON DELETE CASCADE,
- pangenome_graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id),
- metric_level VARCHAR(50) NOT NULL CHECK (metric_level IN ('GRAPH_OVERALL', 'PATH', 'NODE', 'REGION')),
- pangenome_path_id INTEGER REFERENCES public.pangenome_path (id), -- Null if metric_level is GRAPH_OVERALL or NODE
- pangenome_node_id INTEGER REFERENCES public.pangenome_node (id), -- Null if metric_level is GRAPH_OVERALL or PATH/REGION
- region_start_node_id INTEGER REFERENCES public.pangenome_node (id), -- For 'REGION' level, start of the specific segment
- region_end_node_id INTEGER REFERENCES public.pangenome_node (id), -- For 'REGION' level, end of the specific segment
- region_name VARCHAR(255),
- region_length_bp BIGINT,
-
- -- Core Coverage Metrics
- mean_depth DOUBLE PRECISION,
- median_depth DOUBLE PRECISION,
- percent_coverage_at_1x DOUBLE PRECISION,
- percent_coverage_at_5x DOUBLE PRECISION,
- percent_coverage_at_10x DOUBLE PRECISION,
- percent_coverage_at_20x DOUBLE PRECISION,
- percent_coverage_at_30x DOUBLE PRECISION,
- bases_no_coverage BIGINT,
- bases_low_quality_mapping BIGINT,
- bases_callable BIGINT,
-
- -- Mapping Quality Metrics
- mean_mapping_quality DOUBLE PRECISION,
-
- -- Metadata and Provenance
- metrics_date TIMESTAMP NOT NULL DEFAULT NOW(),
- analysis_tool VARCHAR(255) NOT NULL,
- analysis_tool_version VARCHAR(50),
- notes TEXT,
- metadata JSONB
-);
-
--- Indexes for performance
-CREATE INDEX idx_pam_sequence_file_id ON public.pangenome_alignment_metrics (sequence_file_id);
-CREATE INDEX idx_pam_pangenome_graph_id ON public.pangenome_alignment_metrics (pangenome_graph_id);
-CREATE INDEX idx_pam_metric_level ON public.pangenome_alignment_metrics (metric_level);
-CREATE INDEX idx_pam_pangenome_path_id ON public.pangenome_alignment_metrics (pangenome_path_id);
-CREATE INDEX idx_pam_metrics_date ON public.pangenome_alignment_metrics (metrics_date);
-
-ALTER TABLE public.sequence_file
- ADD COLUMN pangenome_graph_id INTEGER REFERENCES public.pangenome_graph(id);
-
--- UNUSED Table
-DROP TABLE public.quality_metrics;
-
-# --- !Downs
-
-ALTER TABLE public.sequence_file DROP COLUMN pangenome_graph_id;
-
-DROP TABLE public.pangenome_alignment_metrics;
\ No newline at end of file
diff --git a/conf/evolutions/default/9.sql b/conf/evolutions/default/9.sql
deleted file mode 100644
index 3a2c53c0..00000000
--- a/conf/evolutions/default/9.sql
+++ /dev/null
@@ -1,47 +0,0 @@
-# --- !Ups
---- Normalizing the metrics since Slick can't deal with that many columns
-DROP TABLE IF EXISTS public.pangenome_alignment_metrics;
-
--- New Table 1: public.pangenome_alignment_metadata
--- Stores general metadata and region info about the alignment metrics
-CREATE TABLE public.pangenome_alignment_metadata
-(
- id BIGSERIAL PRIMARY KEY,
- sequence_file_id BIGINT NOT NULL REFERENCES public.sequence_file (id) ON DELETE CASCADE,
- pangenome_graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id),
- metric_level VARCHAR(50) NOT NULL CHECK (metric_level IN ('GRAPH_OVERALL', 'PATH', 'NODE', 'REGION')),
- pangenome_path_id INTEGER REFERENCES public.pangenome_path (id),
- pangenome_node_id INTEGER REFERENCES public.pangenome_node (id),
- region_start_node_id INTEGER REFERENCES public.pangenome_node (id), -- For 'REGION' level, start of the specific segment
- region_end_node_id INTEGER REFERENCES public.pangenome_node (id), -- For 'REGION' level, end of the specific segment
- region_name VARCHAR(255),
- region_length_bp BIGINT,
- metrics_date TIMESTAMP NOT NULL DEFAULT NOW(),
- analysis_tool VARCHAR(255) NOT NULL,
- analysis_tool_version VARCHAR(50),
- notes TEXT,
- metadata JSONB
-);
-
--- New Table 2: public.pangenome_alignment_coverage
--- Stores detailed coverage and quality metrics, linked to pangenome_alignment_metadata
-CREATE TABLE public.pangenome_alignment_coverage
-(
- alignment_metadata_id BIGINT PRIMARY KEY REFERENCES public.pangenome_alignment_metadata (id) ON DELETE CASCADE,
- mean_depth DOUBLE PRECISION,
- median_depth DOUBLE PRECISION,
- percent_coverage_at_1x DOUBLE PRECISION,
- percent_coverage_at_5x DOUBLE PRECISION,
- percent_coverage_at_10x DOUBLE PRECISION,
- percent_coverage_at_20x DOUBLE PRECISION,
- percent_coverage_at_30x DOUBLE PRECISION,
- bases_no_coverage BIGINT,
- bases_low_quality_mapping BIGINT,
- bases_callable BIGINT,
- mean_mapping_quality DOUBLE PRECISION
-);
-
-# --- !Downs
-
-drop table public.pangenome_alignment_coverage;
-drop table public.pangenome_alignment_metadata;
\ No newline at end of file
diff --git a/conf/evolutions/metadata/1.sql b/conf/evolutions/metadata/1.sql
deleted file mode 100644
index 16d7dfba..00000000
--- a/conf/evolutions/metadata/1.sql
+++ /dev/null
@@ -1,20 +0,0 @@
-# PDS Registrations schema
-# --- !Ups
-
-CREATE TABLE pds_registrations (
- did TEXT PRIMARY KEY,
- pds_url TEXT NOT NULL,
- handle TEXT NOT NULL,
- last_commit_cid TEXT,
- last_commit_seq BIGINT DEFAULT 0,
- cursor BIGINT NOT NULL DEFAULT 0,
- created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
-);
-
-CREATE INDEX pds_registrations_handle_idx ON pds_registrations (handle);
-CREATE INDEX pds_registrations_last_commit_cid_idx ON pds_registrations (last_commit_cid);
-
-# --- !Downs
-
-DROP TABLE IF EXISTS pds_registrations;
\ No newline at end of file
diff --git a/conf/evolutions/metadata/2.sql b/conf/evolutions/metadata/2.sql
deleted file mode 100644
index 1259aa32..00000000
--- a/conf/evolutions/metadata/2.sql
+++ /dev/null
@@ -1,13 +0,0 @@
-# --- !Ups
-ALTER TABLE pds_registrations ADD COLUMN leased_by_instance_id TEXT NULL;
-ALTER TABLE pds_registrations ADD COLUMN lease_expires_at TIMESTAMPTZ NULL;
-ALTER TABLE pds_registrations ADD COLUMN processing_status TEXT NOT NULL DEFAULT 'idle';
-
-CREATE INDEX pds_registrations_lease_expires_at_idx ON pds_registrations (lease_expires_at);
-
-# --- !Downs
-DROP INDEX pds_registrations_lease_expires_at_idx;
-
-ALTER TABLE pds_registrations DROP COLUMN processing_status;
-ALTER TABLE pds_registrations DROP COLUMN lease_expires_at;
-ALTER TABLE pds_registrations DROP COLUMN leased_by_instance_id;
diff --git a/conf/evolutions/metadata/3.sql b/conf/evolutions/metadata/3.sql
deleted file mode 100644
index 022dce3a..00000000
--- a/conf/evolutions/metadata/3.sql
+++ /dev/null
@@ -1,62 +0,0 @@
-# PDS fleet management — status tracking, heartbeat, software versions, capabilities
-
-# --- !Ups
-
-CREATE TABLE pds_node (
- id SERIAL PRIMARY KEY,
- did TEXT NOT NULL UNIQUE,
- pds_url TEXT NOT NULL,
- handle TEXT,
- node_name TEXT,
- software_version TEXT,
- status TEXT NOT NULL DEFAULT 'UNKNOWN'
- CHECK (status IN ('ONLINE', 'OFFLINE', 'BUSY', 'ERROR', 'UNKNOWN')),
- capabilities JSONB NOT NULL DEFAULT '{}',
- last_heartbeat TIMESTAMPTZ,
- last_commit_cid TEXT,
- last_commit_rev TEXT,
- ip_address TEXT,
- os_info TEXT,
- created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
-);
-
-CREATE INDEX idx_pds_node_status ON pds_node(status);
-CREATE INDEX idx_pds_node_last_heartbeat ON pds_node(last_heartbeat);
-CREATE INDEX idx_pds_node_software_version ON pds_node(software_version);
-
-CREATE TABLE pds_heartbeat_log (
- id SERIAL PRIMARY KEY,
- pds_node_id INTEGER NOT NULL REFERENCES pds_node(id),
- status TEXT NOT NULL
- CHECK (status IN ('ONLINE', 'OFFLINE', 'BUSY', 'ERROR', 'UNKNOWN')),
- software_version TEXT,
- load_metrics JSONB,
- processing_queue_size INTEGER DEFAULT 0,
- error_message TEXT,
- recorded_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
-);
-
-CREATE INDEX idx_pds_heartbeat_log_node ON pds_heartbeat_log(pds_node_id);
-CREATE INDEX idx_pds_heartbeat_log_recorded_at ON pds_heartbeat_log(recorded_at);
-
-CREATE TABLE pds_fleet_config (
- id SERIAL PRIMARY KEY,
- config_key TEXT NOT NULL UNIQUE,
- config_value TEXT NOT NULL,
- description TEXT,
- updated_by TEXT,
- updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
-);
-
-INSERT INTO pds_fleet_config (config_key, config_value, description)
-VALUES
- ('target_software_version', '0.1.0', 'Target PDS software version for fleet'),
- ('heartbeat_interval_seconds', '300', 'Expected heartbeat interval in seconds'),
- ('offline_threshold_seconds', '900', 'Seconds without heartbeat before marking OFFLINE');
-
-# --- !Downs
-
-DROP TABLE IF EXISTS pds_fleet_config;
-DROP TABLE IF EXISTS pds_heartbeat_log;
-DROP TABLE IF EXISTS pds_node;
diff --git a/conf/evolutions/metadata/4.sql b/conf/evolutions/metadata/4.sql
deleted file mode 100644
index b0f23e45..00000000
--- a/conf/evolutions/metadata/4.sql
+++ /dev/null
@@ -1,36 +0,0 @@
-# Submission provenance — audit which PDS proposed specific variants and haplogroup calls
-
-# --- !Ups
-
-CREATE TABLE pds_submission (
- id SERIAL PRIMARY KEY,
- pds_node_id INTEGER NOT NULL REFERENCES pds_node(id),
- submission_type TEXT NOT NULL
- CHECK (submission_type IN ('HAPLOGROUP_CALL', 'VARIANT_CALL', 'BRANCH_PROPOSAL', 'PRIVATE_VARIANT', 'STR_PROFILE')),
- biosample_id INTEGER,
- biosample_guid UUID,
- proposed_value TEXT NOT NULL,
- confidence_score DOUBLE PRECISION,
- algorithm_version TEXT,
- software_version TEXT,
- payload JSONB,
- status TEXT NOT NULL DEFAULT 'PENDING'
- CHECK (status IN ('PENDING', 'ACCEPTED', 'REJECTED', 'SUPERSEDED')),
- reviewed_by TEXT,
- reviewed_at TIMESTAMPTZ,
- review_notes TEXT,
- at_uri TEXT,
- at_cid TEXT,
- created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
-);
-
-CREATE INDEX idx_pds_submission_node ON pds_submission(pds_node_id);
-CREATE INDEX idx_pds_submission_type ON pds_submission(submission_type);
-CREATE INDEX idx_pds_submission_biosample ON pds_submission(biosample_id);
-CREATE INDEX idx_pds_submission_biosample_guid ON pds_submission(biosample_guid);
-CREATE INDEX idx_pds_submission_status ON pds_submission(status);
-CREATE INDEX idx_pds_submission_created ON pds_submission(created_at);
-
-# --- !Downs
-
-DROP TABLE IF EXISTS pds_submission;
diff --git a/conf/logback.xml b/conf/logback.xml
deleted file mode 100644
index 9dc376c8..00000000
--- a/conf/logback.xml
+++ /dev/null
@@ -1,67 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ${LOG_PATH}/application.log
-
-
-
- ${LOG_PATH}/application.%d{yyyy-MM-dd}.log.gz
-
-
- 30
-
-
- 3GB
-
-
-
- UTF-8
- %d{yyyy-MM-dd HH:mm:ss} %highlight(%-5level) %cyan(%logger{36}) %magenta(%X{pekkoSource}) %msg%n
-
-
-
-
-
- UTF-8
- %d{yyyy-MM-dd HH:mm:ss} %highlight(%-5level) %cyan(%logger{36}) %magenta(%X{pekkoSource}) %msg%n
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/conf/messages b/conf/messages
deleted file mode 100644
index 014a19fe..00000000
--- a/conf/messages
+++ /dev/null
@@ -1,335 +0,0 @@
-# General
-app.name = Decoding Us
-
-# Navigation
-nav.home = Home
-nav.about = About
-nav.ytree = Y-DNA Tree
-nav.mtree = MT-DNA Tree
-nav.references = References
-nav.tools = Research Tools
-nav.coverage = Coverage Benchmarks
-nav.contact = Contact
-nav.reputation = Reputation System
-nav.terms = Terms of Use
-nav.privacy = Privacy Policy
-nav.cookies = Cookie Usage
-nav.faq = FAQ
-nav.api = API Documentation
-nav.profile = Profile
-nav.candidates = Review Candidates
-nav.messages = Messages
-nav.supportAdmin = Support
-nav.logout = Logout
-nav.login = Login
-
-# Footer
-footer.copyright = Copyright © {0}, Decoding-Us.com
-
-# Home Page
-home.title = Decoding Us
-home.welcome = Welcome to Decoding Us
-
-# Reputation System
-reputation.title = Community Reputation System
-reputation.link = Reputation System
-
-# Legal
-legal.cookies.title = Cookie Usage
-legal.privacy.title = Privacy Policy
-legal.terms.title = Terms of Use
-legal.faq.title = FAQ
-
-# Cookie Consent Banner
-cookies.banner.message = We use cookies only for authentication. No tracking, no marketing, no third-party sharing.
-cookies.banner.learnMore = Learn more
-cookies.banner.accept = Accept
-
-# Generic
-generic.notAvailable = Not Available
-generic.submit = Submit
-generic.loading = Loading...
-generic.show = Show
-generic.hide = Hide
-generic.cancel = Cancel
-
-# Pagination
-pagination.pageOf = Page {0} of {1}
-pagination.previous = Previous
-pagination.next = Next
-pagination.first = First
-pagination.last = Last
-pagination.itemsPerPage = Items per page:
-
-# Biosample Report
-biosample.details = Biosample Details
-biosample.header.sampleId = Sample ID
-biosample.header.accession = Accession Number
-biosample.header.origin = Geographic Origin
-biosample.header.timePeriod = Time Period
-biosample.header.datingPeriod = Dating Period
-biosample.header.sex = Sex
-biosample.header.yHaplo = Y-DNA Haplogroup
-biosample.header.mtHaplo = mt-DNA Haplogroup
-biosample.header.population = Population Group
-biosample.analysisMethod = Analysis method: {0}
-
-# Map
-map.title = Biosamples Geographic Distribution
-map.loading = Loading samples...
-map.totalSamplesPrefix = Total Samples:
-map.totalSamples = Total Samples: {0} (this key is for informational purposes for now, should not be used in views)
-
-# Contact
-contact.title = Contact Us
-contact.name.label = Name
-contact.name.placeholder = Your name
-contact.name.help = Please enter your full name
-contact.email.label = Email
-contact.email.placeholder = your.email@example.com
-contact.email.help = We'll never share your email with anyone else
-contact.subject.label = Subject
-contact.subject.placeholder = Subject of your message
-contact.message.label = Message
-contact.message.placeholder = Your message
-contact.send = Send Message
-contact.authenticated.info = You're logged in. Your message will be linked to your account and you can track responses in your message history.
-contact.viewHistory = View Message History
-
-# Support - User Messages
-support.myMessages.title = My Messages
-support.myMessages.heading = My Message History
-support.myMessages.empty = You haven't sent any messages yet.
-support.newMessage = New Message
-support.status.new = New
-support.status.read = Read
-support.status.replied = Replied
-support.status.closed = Closed
-
-# Support - Admin
-support.admin.title = Support Messages
-support.admin.heading = Contact Message Management
-support.admin.filterStatus = Filter by status:
-support.admin.allStatuses = All Statuses
-support.admin.noMessages = No messages found.
-support.admin.table.status = Status
-support.admin.table.from = From
-support.admin.table.subject = Subject
-support.admin.table.date = Date
-support.admin.view = View
-support.admin.authenticated = User
-support.admin.registeredUser = Registered User
-support.admin.anonymous = Anonymous
-support.admin.messageDetail.title = Message Details
-support.admin.backToList = Back to Messages
-support.admin.receivedAt = Received
-support.admin.replies = Replies
-support.admin.repliedBy = Replied by
-support.admin.emailSent = Email sent
-support.admin.sendReply = Send Reply
-support.admin.replyPlaceholder = Type your reply here...
-support.admin.sendEmailCopy = Send email copy to
-support.admin.noEmailForAuth = This user is logged in. Replies will appear in their message history.
-support.admin.submitReply = Send Reply
-support.admin.senderInfo = Sender Information
-support.admin.type = Type
-support.admin.handle = Handle
-support.admin.userId = User ID
-support.admin.name = Name
-support.admin.email = Email
-support.admin.actions = Actions
-support.admin.markClosed = Mark as Closed
-support.admin.reopen = Reopen
-
-# Errors
-error.forbidden.title = Access Denied
-error.forbidden.heading = Access Denied
-error.notFound.title = Not Found
-error.notFound.heading = Page Not Found
-
-# Coverage
-coverage.subtitle = Aggregated coverage statistics grouped by laboratory, test type, and contig.
-coverage.selectLab = Select Sequencing Laboratory:
-coverage.selectLab.default = -- Select a lab --
-coverage.selectLab.prompt = Please select a laboratory to view benchmark data.
-coverage.metrics.title = Explanation of Metrics
-coverage.whyMatters = Why This Matters
-coverage.technicalDetails = Technical Details
-
-# Benchmarks
-benchmark.header.testType = Test Type
-benchmark.header.contig = Contig
-benchmark.header.meanReadLen = Mean Read Length
-benchmark.header.readLenRange = Read Length Range
-benchmark.header.meanInsertSize = Mean Insert Size
-benchmark.header.insertSizeRange = Insert Size Range
-benchmark.header.meanDepth = Mean Depth
-benchmark.header.noCoverage = Bases No Coverage
-benchmark.header.lowQuality = Low Quality Mapping
-benchmark.header.callable = Callable Bases
-benchmark.header.meanMappingQuality = Mean Mapping Quality
-benchmark.header.samples = Samples
-benchmark.noData = No benchmark data available for this laboratory.
-
-# Tree
-tree.legend.established = Established haplogroup
-tree.legend.updated = Updated in the last year
-tree.noData = No tree data available
-tree.loading = Loading haplogroup tree...
-tree.jumpTo = Jump to haplogroup:
-tree.go = Go
-tree.reRoot = Click to re-root
-tree.verticalCladogram = Vertical Cladogram
-tree.clickToSeeVariants = Click to see {0} defining Variants(s)
-
-# Sidebar
-sidebar.title = Defining Variant(s) for {0}
-sidebar.noVariants = No defining Variants found for {0}.
-sidebar.type = Type: {0}
-sidebar.refLocation = Reference Location(s):
-
-# References Page
-references.title = Publications
-references.viewMap = View Map
-references.search.placeholder = Search by title, author, DOI...
-references.search.help = Search publications by title, author name, journal, or DOI
-
-# Publication
-publication.authors = Authors:
-publication.authors.etAl = (See paper for full author list)
-publication.journal = Journal:
-publication.published = Published:
-publication.abstract = Abstract
-publication.additionalDetails = Additional Details
-publication.classification = Classification:
-publication.access = Publication Access:
-publication.impact = Impact Metrics:
-publication.cited = Cited: {0}
-publication.percentile = Percentile: {0}%
-publication.genomicStudies = Genomic Studies
-publication.sampleCount = Sample Count: {0}
-publication.showSamples = Show Samples
-publication.hideSamples = Hide Samples
-publication.directLink = Direct Link
-publication.source.ena = European Nucleotide Archive
-publication.source.ncbi = NCBI BioProject
-publication.list.notFound = No publications found matching "
{0} ". Try a different search term.
-publication.list.empty = No publications available.
-publication.list.total = {0} publications
-publication.submit.title = Submit Publication
-publication.submit.doi.label = DOI
-publication.submit.doi.placeholder = Enter DOI or DOI URL (e.g., 10.1234/example or https://doi.org/10.1234/example)
-publication.submit.doi.help = Enter either a DOI (10.1234/example) or the full DOI URL (https://doi.org/10.1234/example)
-publication.submit.ena.label = Sequence Archive Study Accession (Optional)
-publication.submit.ena.placeholder = Enter sequence archive study accession if available (e.g., PRJEB12345, PRJNA123456)
-publication.submit.ena.help = If the paper mentions a sequence archive study (e.g., from ENA or SRA), enter its accession here
-publication.submit.forceRefresh = Force refresh (Update even if publication already exists)
-
-# Publication Candidates (New)
-publicationCandidates.title = Publication Candidates Review
-publicationCandidates.heading = Publication Candidates for Review
-publicationCandidates.noCandidates = No pending publication candidates found.
-publicationCandidates.table.title = Title
-publicationCandidates.table.journal = Journal
-publicationCandidates.table.date = Publication Date
-publicationCandidates.table.relevance = Relevance Score
-publicationCandidates.table.status = Status
-publicationCandidates.table.actions = Actions
-publicationCandidates.action.accept = Accept
-publicationCandidates.action.reject = Reject
-publicationCandidates.action.reviewed = Reviewed
-publicationCandidates.confirmAccept = Are you sure you want to accept this candidate and import it as a new publication?
-publicationCandidates.confirmReject = Are you sure you want to reject this candidate?
-publicationCandidates.acceptSuccess = Candidate '{0}' accepted and imported successfully.
-publicationCandidates.acceptFailed = Failed to accept candidate or import publication.
-publicationCandidates.acceptError = Error accepting candidate: {0}
-publicationCandidates.rejectSuccess = Candidate rejected successfully.
-publicationCandidates.rejectFailed = Failed to reject candidate.
-publicationCandidates.rejectError = Error rejecting candidate: {0}
-publicationCandidates.promptRejectReason = Please enter a reason for rejecting this candidate.
-publicationCandidates.emptyRejectReasonAlert = Rejection reason cannot be empty.
-
-# Auth
-auth.login.title = Login
-auth.login.heading = Sign In
-auth.login.federation.title = Federated Login
-auth.login.federation.description = Sign in with your AT Protocol identity from any compatible provider (Bluesky, self-hosted PDS, or future DecodingUs accounts).
-auth.login.handle = Handle or DID
-auth.login.handle.placeholder = alice.bsky.social or did:plc:...
-auth.login.handle.help = Enter your full handle (e.g., alice.bsky.social) or DID. We'll find your identity provider automatically.
-auth.login.password = App Password
-auth.login.passwordHelp = Use an App Password from your identity provider.
Learn more about App Passwords .
-auth.login.submit = Sign In
-
-# Auth - App Password Help
-auth.appPasswordHelp.title = What is an App Password?
-auth.appPasswordHelp.heading = What is an App Password?
-auth.appPasswordHelp.p1 = An App Password is a unique, one-time password generated by your AT Protocol identity provider (such as Bluesky, a self-hosted PDS, or other compatible services) specifically for third-party applications like this one. It allows you to grant access to an application without sharing your main account password.
-auth.appPasswordHelp.p2 = Using App Passwords enhances your security by:
-auth.appPasswordHelp.li1 = Allowing you to revoke access for a single application without changing your main password.
-auth.appPasswordHelp.li2 = Limiting the permissions an application might have (though for this app, it grants full session access).
-auth.appPasswordHelp.generateSteps = How to Generate an App Password:
-auth.appPasswordHelp.step1 = Open your AT Protocol client or identity provider's settings (e.g., bsky.app for Bluesky, or your self-hosted PDS admin panel).
-auth.appPasswordHelp.step2 = Navigate to
Settings >
App Passwords (or
Privacy & Security >
App Passwords ).
-auth.appPasswordHelp.step3 = Click on
Add App Password .
-auth.appPasswordHelp.step4 = Give it a descriptive name (e.g., "DecodingUs App").
-auth.appPasswordHelp.step5 = Copy the generated password string. This is the only time you will see it.
-auth.appPasswordHelp.note = This generated password is what you'll use in the Login form. Treat it like a regular password.
-
-# Profile
-profile.title = User Profile
-profile.heading = Your Profile
-profile.accountDetails = Account Details
-profile.handle = Handle
-profile.did = DID
-profile.editProfile = Edit Profile
-profile.displayName = Display Name
-profile.displayNameHelp = This name will be visible to other users in the messaging system.
-profile.save = Save Profile
-
-# Variant Browser
-nav.variants = Variant Browser
-variants.browser.title = Variant Browser
-variants.browser.heading = Variant Browser
-variants.browser.description = Search the Y-DNA and mtDNA variant database. Find SNPs by name, rsID, or alias.
-variants.browser.variants = Variants
-variants.browser.searchPlaceholder = Search by rsID, SNP name, or alias (e.g., M269, rs9786076)...
-variants.browser.searchHelp = Enter at least 2 characters to search. Searches variant names, rsIDs, and aliases.
-variants.browser.selectVariant = Select a variant to view details
-variants.browser.foundMatching = Found {0} variant groups matching "{1}"
-variants.browser.showingTotal = Showing {0} total variant groups
-variants.browser.pageOf = Page {0} of {1}
-variants.browser.noResults = No variants found matching "{0}".
-variants.browser.enterSearch = Enter a search term to find variants.
-variants.browser.col.name = Name / rsID
-variants.browser.col.alleles = Anc/Der
-variants.browser.col.type = Type
-variants.browser.col.builds = Builds
-variants.browser.strandDiffers = Strand differs between builds
-
-# Variant Detail Panel
-variants.detail.rsId = rsID
-variants.detail.commonName = Common Name
-variants.detail.ancestral = Ancestral
-variants.detail.derived = Derived
-variants.detail.type = Type
-variants.detail.status = Naming Status
-variants.detail.altNames = Alternative Names
-variants.detail.source = Source
-variants.detail.refBuilds = Reference Builds
-variants.detail.strandDiff = Strand difference (reverse complement)
-variants.detail.build = Build
-variants.detail.position = Position
-variants.detail.alleles = Alleles
-variants.detail.motifRepeats = Motif / Repeats
-variants.detail.usedBy = Used By Haplogroups
-variants.detail.noHaplogroups = This variant is not associated with any haplogroups.
-variants.detail.more = more
-variants.detail.aliasType.snpNames = SNP Names
-variants.detail.aliasType.dbsnp = dbSNP IDs
-
-# Language
-lang.switch = Language
-lang.en = English
-lang.fr = Français
-lang.es = Español
diff --git a/conf/messages.es b/conf/messages.es
deleted file mode 100644
index bde14d94..00000000
--- a/conf/messages.es
+++ /dev/null
@@ -1,335 +0,0 @@
-# General
-app.name = Decoding Us
-
-# Navigation
-nav.home = Inicio
-nav.about = Acerca de
-nav.ytree = Árbol Y-ADN
-nav.mtree = Árbol ADN-mt
-nav.references = Referencias
-nav.tools = Herramientas de investigación
-nav.coverage = Benchmarks de cobertura
-nav.contact = Contacto
-nav.reputation = Sistema de reputación
-nav.terms = Condiciones de uso
-nav.privacy = Política de privacidad
-nav.cookies = Uso de cookies
-nav.faq = Preguntas frecuentes
-nav.api = Documentación de la API
-nav.profile = Perfil
-nav.candidates = Revisar candidatos
-nav.messages = Mensajes
-nav.supportAdmin = Soporte
-nav.logout = Cerrar sesión
-nav.login = Iniciar sesión
-
-# Footer
-footer.copyright = Copyright © {0}, Decoding-Us.com
-
-# Home Page
-home.title = Decoding Us
-home.welcome = Bienvenido a Decoding Us
-
-# Reputation System
-reputation.title = Sistema de reputación comunitaria
-reputation.link = Sistema de reputación
-
-# Legal
-legal.cookies.title = Uso de cookies
-legal.privacy.title = Política de privacidad
-legal.terms.title = Condiciones de uso
-legal.faq.title = Preguntas frecuentes
-
-# Cookie Consent Banner
-cookies.banner.message = Usamos cookies únicamente para la autenticación. Sin rastreo, sin marketing, sin compartir con terceros.
-cookies.banner.learnMore = Más información
-cookies.banner.accept = Aceptar
-
-# Generic
-generic.notAvailable = No disponible
-generic.submit = Enviar
-generic.loading = Cargando...
-generic.show = Mostrar
-generic.hide = Ocultar
-generic.cancel = Cancelar
-
-# Pagination
-pagination.pageOf = Página {0} de {1}
-pagination.previous = Anterior
-pagination.next = Siguiente
-pagination.first = Primero
-pagination.last = Último
-pagination.itemsPerPage = Elementos por página:
-
-# Biosample Report
-biosample.details = Detalles de la muestra
-biosample.header.sampleId = Identificador de la muestra
-biosample.header.accession = Número de acceso
-biosample.header.origin = Origen geográfico
-biosample.header.timePeriod = Período
-biosample.header.datingPeriod = Período de datación
-biosample.header.sex = Sexo
-biosample.header.yHaplo = Haplogrupo Y-ADN
-biosample.header.mtHaplo = Haplogrupo ADN-mt
-biosample.header.population = Grupo de población
-biosample.analysisMethod = Método de análisis: {0}
-
-# Map
-map.title = Distribución geográfica de las muestras
-map.loading = Cargando muestras...
-map.totalSamplesPrefix = Total de muestras:
-map.totalSamples = Total de muestras: {0}
-
-# Contact
-contact.title = Contáctenos
-contact.name.label = Nombre
-contact.name.placeholder = Su nombre
-contact.name.help = Por favor, introduzca su nombre completo
-contact.email.label = Correo electrónico
-contact.email.placeholder = su.correo@ejemplo.com
-contact.email.help = Nunca compartiremos su correo electrónico con nadie
-contact.subject.label = Asunto
-contact.subject.placeholder = Asunto de su mensaje
-contact.message.label = Mensaje
-contact.message.placeholder = Su mensaje
-contact.send = Enviar mensaje
-contact.authenticated.info = Ha iniciado sesión. Su mensaje se vinculará a su cuenta y podrá seguir las respuestas en su historial.
-contact.viewHistory = Ver historial de mensajes
-
-# Support - User Messages
-support.myMessages.title = Mis mensajes
-support.myMessages.heading = Historial de mis mensajes
-support.myMessages.empty = Aún no ha enviado ningún mensaje.
-support.newMessage = Nuevo mensaje
-support.status.new = Nuevo
-support.status.read = Leído
-support.status.replied = Respondido
-support.status.closed = Cerrado
-
-# Support - Admin
-support.admin.title = Mensajes de soporte
-support.admin.heading = Gestión de mensajes de contacto
-support.admin.filterStatus = Filtrar por estado:
-support.admin.allStatuses = Todos los estados
-support.admin.noMessages = No se encontraron mensajes.
-support.admin.table.status = Estado
-support.admin.table.from = De
-support.admin.table.subject = Asunto
-support.admin.table.date = Fecha
-support.admin.view = Ver
-support.admin.authenticated = Usuario
-support.admin.registeredUser = Usuario registrado
-support.admin.anonymous = Anónimo
-support.admin.messageDetail.title = Detalles del mensaje
-support.admin.backToList = Volver a los mensajes
-support.admin.receivedAt = Recibido
-support.admin.replies = Respuestas
-support.admin.repliedBy = Respondido por
-support.admin.emailSent = Correo enviado
-support.admin.sendReply = Enviar respuesta
-support.admin.replyPlaceholder = Escriba su respuesta aquí...
-support.admin.sendEmailCopy = Enviar copia por correo a
-support.admin.noEmailForAuth = Este usuario ha iniciado sesión. Las respuestas aparecerán en su historial de mensajes.
-support.admin.submitReply = Enviar respuesta
-support.admin.senderInfo = Información del remitente
-support.admin.type = Tipo
-support.admin.handle = Identificador
-support.admin.userId = ID de usuario
-support.admin.name = Nombre
-support.admin.email = Correo electrónico
-support.admin.actions = Acciones
-support.admin.markClosed = Marcar como cerrado
-support.admin.reopen = Reabrir
-
-# Errors
-error.forbidden.title = Acceso denegado
-error.forbidden.heading = Acceso denegado
-error.notFound.title = No encontrado
-error.notFound.heading = Página no encontrada
-
-# Coverage
-coverage.subtitle = Estadísticas de cobertura agregadas por laboratorio, tipo de prueba y contig.
-coverage.selectLab = Seleccionar laboratorio de secuenciación:
-coverage.selectLab.default = -- Seleccionar un laboratorio --
-coverage.selectLab.prompt = Por favor, seleccione un laboratorio para ver los datos de benchmark.
-coverage.metrics.title = Explicación de las métricas
-coverage.whyMatters = Por qué es importante
-coverage.technicalDetails = Detalles técnicos
-
-# Benchmarks
-benchmark.header.testType = Tipo de prueba
-benchmark.header.contig = Contig
-benchmark.header.meanReadLen = Longitud media de lectura
-benchmark.header.readLenRange = Rango de longitud de lectura
-benchmark.header.meanInsertSize = Tamaño medio de inserción
-benchmark.header.insertSizeRange = Rango de tamaño de inserción
-benchmark.header.meanDepth = Profundidad media
-benchmark.header.noCoverage = Bases sin cobertura
-benchmark.header.lowQuality = Mapeo de baja calidad
-benchmark.header.callable = Bases utilizables
-benchmark.header.meanMappingQuality = Calidad media de mapeo
-benchmark.header.samples = Muestras
-benchmark.noData = No hay datos de benchmark disponibles para este laboratorio.
-
-# Tree
-tree.legend.established = Haplogrupo establecido
-tree.legend.updated = Actualizado en el último año
-tree.noData = No hay datos del árbol disponibles
-tree.loading = Cargando árbol de haplogrupos...
-tree.jumpTo = Ir al haplogrupo:
-tree.go = Ir
-tree.reRoot = Clic para cambiar la raíz
-tree.verticalCladogram = Cladograma vertical
-tree.clickToSeeVariants = Clic para ver {0} variante(s) definidora(s)
-
-# Sidebar
-sidebar.title = Variante(s) definidora(s) para {0}
-sidebar.noVariants = No se encontraron variantes definidoras para {0}.
-sidebar.type = Tipo: {0}
-sidebar.refLocation = Ubicación(es) de referencia:
-
-# References Page
-references.title = Publicaciones
-references.viewMap = Ver mapa
-references.search.placeholder = Buscar por título, autor, DOI...
-references.search.help = Buscar publicaciones por título, nombre de autor, revista o DOI
-
-# Publication
-publication.authors = Autores:
-publication.authors.etAl = (Ver el artículo para la lista completa de autores)
-publication.journal = Revista:
-publication.published = Publicado:
-publication.abstract = Resumen
-publication.additionalDetails = Detalles adicionales
-publication.classification = Clasificación:
-publication.access = Acceso a la publicación:
-publication.impact = Métricas de impacto:
-publication.cited = Citado: {0}
-publication.percentile = Percentil: {0}%
-publication.genomicStudies = Estudios genómicos
-publication.sampleCount = Cantidad de muestras: {0}
-publication.showSamples = Mostrar muestras
-publication.hideSamples = Ocultar muestras
-publication.directLink = Enlace directo
-publication.source.ena = Archivo Europeo de Nucleótidos
-publication.source.ncbi = BioProyecto NCBI
-publication.list.notFound = No se encontraron publicaciones que coincidan con «
{0} ». Intente con otro término de búsqueda.
-publication.list.empty = No hay publicaciones disponibles.
-publication.list.total = {0} publicaciones
-publication.submit.title = Enviar publicación
-publication.submit.doi.label = DOI
-publication.submit.doi.placeholder = Ingrese el DOI o la URL del DOI (ej.: 10.1234/ejemplo o https://doi.org/10.1234/ejemplo)
-publication.submit.doi.help = Ingrese un DOI (10.1234/ejemplo) o la URL completa del DOI (https://doi.org/10.1234/ejemplo)
-publication.submit.ena.label = Acceso de estudio de archivo de secuencias (opcional)
-publication.submit.ena.placeholder = Ingrese el acceso de estudio de archivo si está disponible (ej.: PRJEB12345, PRJNA123456)
-publication.submit.ena.help = Si el artículo menciona un estudio de archivo de secuencias (ej.: ENA o SRA), ingrese su acceso aquí
-publication.submit.forceRefresh = Forzar actualización (actualizar aunque la publicación ya exista)
-
-# Publication Candidates
-publicationCandidates.title = Revisión de publicaciones candidatas
-publicationCandidates.heading = Publicaciones candidatas para revisar
-publicationCandidates.noCandidates = No se encontraron publicaciones candidatas pendientes.
-publicationCandidates.table.title = Título
-publicationCandidates.table.journal = Revista
-publicationCandidates.table.date = Fecha de publicación
-publicationCandidates.table.relevance = Puntuación de relevancia
-publicationCandidates.table.status = Estado
-publicationCandidates.table.actions = Acciones
-publicationCandidates.action.accept = Aceptar
-publicationCandidates.action.reject = Rechazar
-publicationCandidates.action.reviewed = Revisado
-publicationCandidates.confirmAccept = ¿Está seguro de que desea aceptar este candidato e importarlo como nueva publicación?
-publicationCandidates.confirmReject = ¿Está seguro de que desea rechazar este candidato?
-publicationCandidates.acceptSuccess = Candidato «{0}» aceptado e importado con éxito.
-publicationCandidates.acceptFailed = Error al aceptar el candidato o importar la publicación.
-publicationCandidates.acceptError = Error al aceptar el candidato: {0}
-publicationCandidates.rejectSuccess = Candidato rechazado con éxito.
-publicationCandidates.rejectFailed = Error al rechazar el candidato.
-publicationCandidates.rejectError = Error al rechazar el candidato: {0}
-publicationCandidates.promptRejectReason = Por favor, ingrese una razón para rechazar este candidato.
-publicationCandidates.emptyRejectReasonAlert = La razón del rechazo no puede estar vacía.
-
-# Auth
-auth.login.title = Iniciar sesión
-auth.login.heading = Iniciar sesión
-auth.login.federation.title = Inicio de sesión federado
-auth.login.federation.description = Inicie sesión con su identidad AT Protocol desde cualquier proveedor compatible (Bluesky, PDS auto-alojado o futuras cuentas DecodingUs).
-auth.login.handle = Identificador o DID
-auth.login.handle.placeholder = alice.bsky.social o did:plc:...
-auth.login.handle.help = Ingrese su identificador completo (ej.: alice.bsky.social) o DID. Encontraremos su proveedor de identidad automáticamente.
-auth.login.password = Contraseña de aplicación
-auth.login.passwordHelp = Use una contraseña de aplicación de su proveedor de identidad.
Más información sobre las contraseñas de aplicación .
-auth.login.submit = Iniciar sesión
-
-# Auth - App Password Help
-auth.appPasswordHelp.title = ¿Qué es una contraseña de aplicación?
-auth.appPasswordHelp.heading = ¿Qué es una contraseña de aplicación?
-auth.appPasswordHelp.p1 = Una contraseña de aplicación es una contraseña única generada por su proveedor de identidad AT Protocol (como Bluesky, un PDS auto-alojado u otros servicios compatibles) específicamente para aplicaciones de terceros como esta. Le permite otorgar acceso a una aplicación sin compartir su contraseña principal.
-auth.appPasswordHelp.p2 = El uso de contraseñas de aplicación mejora su seguridad al:
-auth.appPasswordHelp.li1 = Permitirle revocar el acceso de una sola aplicación sin cambiar su contraseña principal.
-auth.appPasswordHelp.li2 = Limitar los permisos que una aplicación puede tener (aunque para esta aplicación, otorga acceso completo a la sesión).
-auth.appPasswordHelp.generateSteps = Cómo generar una contraseña de aplicación:
-auth.appPasswordHelp.step1 = Abra la configuración de su cliente AT Protocol o proveedor de identidad (ej.: bsky.app para Bluesky, o el panel de administración de su PDS auto-alojado).
-auth.appPasswordHelp.step2 = Navegue a
Configuración >
Contraseñas de aplicación (o
Privacidad y seguridad >
Contraseñas de aplicación ).
-auth.appPasswordHelp.step3 = Haga clic en
Agregar contraseña de aplicación .
-auth.appPasswordHelp.step4 = Asígnele un nombre descriptivo (ej.: "App DecodingUs").
-auth.appPasswordHelp.step5 = Copie la contraseña generada. Esta es la única vez que la verá.
-auth.appPasswordHelp.note = Esta contraseña generada es la que usará en el formulario de inicio de sesión. Trátela como una contraseña normal.
-
-# Profile
-profile.title = Perfil de usuario
-profile.heading = Su perfil
-profile.accountDetails = Detalles de la cuenta
-profile.handle = Identificador
-profile.did = DID
-profile.editProfile = Editar perfil
-profile.displayName = Nombre para mostrar
-profile.displayNameHelp = Este nombre será visible para otros usuarios en el sistema de mensajería.
-profile.save = Guardar perfil
-
-# Variant Browser
-nav.variants = Navegador de variantes
-variants.browser.title = Navegador de variantes
-variants.browser.heading = Navegador de variantes
-variants.browser.description = Busque en la base de datos de variantes de Y-ADN y ADN-mt. Encuentre SNP por nombre, rsID o alias.
-variants.browser.variants = Variantes
-variants.browser.searchPlaceholder = Buscar por rsID, nombre de SNP o alias (ej.: M269, rs9786076)...
-variants.browser.searchHelp = Ingrese al menos 2 caracteres para buscar. Busca en nombres de variantes, rsID y alias.
-variants.browser.selectVariant = Seleccione una variante para ver los detalles
-variants.browser.foundMatching = Se encontraron {0} grupos de variantes para «{1}»
-variants.browser.showingTotal = Mostrando {0} grupos de variantes en total
-variants.browser.pageOf = Página {0} de {1}
-variants.browser.noResults = No se encontraron variantes para «{0}».
-variants.browser.enterSearch = Ingrese un término de búsqueda para encontrar variantes.
-variants.browser.col.name = Nombre / rsID
-variants.browser.col.alleles = Anc/Der
-variants.browser.col.type = Tipo
-variants.browser.col.builds = Builds
-variants.browser.strandDiffers = El hebra difiere entre builds
-
-# Variant Detail Panel
-variants.detail.rsId = rsID
-variants.detail.commonName = Nombre común
-variants.detail.ancestral = Ancestral
-variants.detail.derived = Derivado
-variants.detail.type = Tipo
-variants.detail.status = Estado de denominación
-variants.detail.altNames = Nombres alternativos
-variants.detail.source = Fuente
-variants.detail.refBuilds = Builds de referencia
-variants.detail.strandDiff = Diferencia de hebra (complemento inverso)
-variants.detail.build = Build
-variants.detail.position = Posición
-variants.detail.alleles = Alelos
-variants.detail.motifRepeats = Motivo / Repeticiones
-variants.detail.usedBy = Usado por haplogrupos
-variants.detail.noHaplogroups = Esta variante no está asociada con ningún haplogrupo.
-variants.detail.more = más
-variants.detail.aliasType.snpNames = Nombres de SNP
-variants.detail.aliasType.dbsnp = Identificadores dbSNP
-
-# Language
-lang.switch = Idioma
-lang.en = English
-lang.fr = Français
-lang.es = Español
diff --git a/conf/messages.fr b/conf/messages.fr
deleted file mode 100644
index 850b0026..00000000
--- a/conf/messages.fr
+++ /dev/null
@@ -1,335 +0,0 @@
-# General
-app.name = Decoding Us
-
-# Navigation
-nav.home = Accueil
-nav.about = À propos
-nav.ytree = Arbre Y-ADN
-nav.mtree = Arbre ADN-mt
-nav.references = Références
-nav.tools = Outils de recherche
-nav.coverage = Benchmarks de couverture
-nav.contact = Contact
-nav.reputation = Système de réputation
-nav.terms = Conditions d''utilisation
-nav.privacy = Politique de confidentialité
-nav.cookies = Utilisation des cookies
-nav.faq = FAQ
-nav.api = Documentation de l''API
-nav.profile = Profil
-nav.candidates = Examiner les candidats
-nav.messages = Messages
-nav.supportAdmin = Support
-nav.logout = Déconnexion
-nav.login = Connexion
-
-# Footer
-footer.copyright = Copyright © {0}, Decoding-Us.com
-
-# Home Page
-home.title = Decoding Us
-home.welcome = Bienvenue sur Decoding Us
-
-# Reputation System
-reputation.title = Système de réputation communautaire
-reputation.link = Système de réputation
-
-# Legal
-legal.cookies.title = Utilisation des cookies
-legal.privacy.title = Politique de confidentialité
-legal.terms.title = Conditions d''utilisation
-legal.faq.title = FAQ
-
-# Cookie Consent Banner
-cookies.banner.message = Nous utilisons les cookies uniquement pour l''authentification. Pas de suivi, pas de marketing, pas de partage avec des tiers.
-cookies.banner.learnMore = En savoir plus
-cookies.banner.accept = Accepter
-
-# Generic
-generic.notAvailable = Non disponible
-generic.submit = Envoyer
-generic.loading = Chargement...
-generic.show = Afficher
-generic.hide = Masquer
-generic.cancel = Annuler
-
-# Pagination
-pagination.pageOf = Page {0} sur {1}
-pagination.previous = Précédent
-pagination.next = Suivant
-pagination.first = Premier
-pagination.last = Dernier
-pagination.itemsPerPage = Éléments par page :
-
-# Biosample Report
-biosample.details = Détails de l''échantillon
-biosample.header.sampleId = Identifiant de l''échantillon
-biosample.header.accession = Numéro d''accession
-biosample.header.origin = Origine géographique
-biosample.header.timePeriod = Période
-biosample.header.datingPeriod = Période de datation
-biosample.header.sex = Sexe
-biosample.header.yHaplo = Haplogroupe Y-ADN
-biosample.header.mtHaplo = Haplogroupe ADN-mt
-biosample.header.population = Groupe de population
-biosample.analysisMethod = Méthode d''analyse : {0}
-
-# Map
-map.title = Distribution géographique des échantillons
-map.loading = Chargement des échantillons...
-map.totalSamplesPrefix = Total des échantillons :
-map.totalSamples = Total des échantillons : {0}
-
-# Contact
-contact.title = Nous contacter
-contact.name.label = Nom
-contact.name.placeholder = Votre nom
-contact.name.help = Veuillez entrer votre nom complet
-contact.email.label = Courriel
-contact.email.placeholder = votre.courriel@exemple.com
-contact.email.help = Nous ne partagerons jamais votre courriel avec qui que ce soit
-contact.subject.label = Objet
-contact.subject.placeholder = Objet de votre message
-contact.message.label = Message
-contact.message.placeholder = Votre message
-contact.send = Envoyer le message
-contact.authenticated.info = Vous êtes connecté(e). Votre message sera lié à votre compte et vous pourrez suivre les réponses dans votre historique.
-contact.viewHistory = Voir l''historique des messages
-
-# Support - User Messages
-support.myMessages.title = Mes messages
-support.myMessages.heading = Historique de mes messages
-support.myMessages.empty = Vous n''avez pas encore envoyé de messages.
-support.newMessage = Nouveau message
-support.status.new = Nouveau
-support.status.read = Lu
-support.status.replied = Répondu
-support.status.closed = Fermé
-
-# Support - Admin
-support.admin.title = Messages de support
-support.admin.heading = Gestion des messages de contact
-support.admin.filterStatus = Filtrer par statut :
-support.admin.allStatuses = Tous les statuts
-support.admin.noMessages = Aucun message trouvé.
-support.admin.table.status = Statut
-support.admin.table.from = De
-support.admin.table.subject = Objet
-support.admin.table.date = Date
-support.admin.view = Voir
-support.admin.authenticated = Utilisateur
-support.admin.registeredUser = Utilisateur enregistré
-support.admin.anonymous = Anonyme
-support.admin.messageDetail.title = Détails du message
-support.admin.backToList = Retour aux messages
-support.admin.receivedAt = Reçu le
-support.admin.replies = Réponses
-support.admin.repliedBy = Répondu par
-support.admin.emailSent = Courriel envoyé
-support.admin.sendReply = Envoyer une réponse
-support.admin.replyPlaceholder = Tapez votre réponse ici...
-support.admin.sendEmailCopy = Envoyer une copie par courriel à
-support.admin.noEmailForAuth = Cet utilisateur est connecté. Les réponses apparaîtront dans son historique de messages.
-support.admin.submitReply = Envoyer la réponse
-support.admin.senderInfo = Informations sur l''expéditeur
-support.admin.type = Type
-support.admin.handle = Identifiant
-support.admin.userId = ID utilisateur
-support.admin.name = Nom
-support.admin.email = Courriel
-support.admin.actions = Actions
-support.admin.markClosed = Marquer comme fermé
-support.admin.reopen = Rouvrir
-
-# Errors
-error.forbidden.title = Accès refusé
-error.forbidden.heading = Accès refusé
-error.notFound.title = Non trouvé
-error.notFound.heading = Page non trouvée
-
-# Coverage
-coverage.subtitle = Statistiques de couverture agrégées par laboratoire, type de test et contig.
-coverage.selectLab = Sélectionner un laboratoire de séquençage :
-coverage.selectLab.default = -- Sélectionner un laboratoire --
-coverage.selectLab.prompt = Veuillez sélectionner un laboratoire pour consulter les données de benchmark.
-coverage.metrics.title = Explication des métriques
-coverage.whyMatters = Pourquoi c''est important
-coverage.technicalDetails = Détails techniques
-
-# Benchmarks
-benchmark.header.testType = Type de test
-benchmark.header.contig = Contig
-benchmark.header.meanReadLen = Longueur moyenne des lectures
-benchmark.header.readLenRange = Plage de longueur des lectures
-benchmark.header.meanInsertSize = Taille moyenne d''insertion
-benchmark.header.insertSizeRange = Plage de taille d''insertion
-benchmark.header.meanDepth = Profondeur moyenne
-benchmark.header.noCoverage = Bases sans couverture
-benchmark.header.lowQuality = Mapping de faible qualité
-benchmark.header.callable = Bases appelables
-benchmark.header.meanMappingQuality = Qualité moyenne de mapping
-benchmark.header.samples = Échantillons
-benchmark.noData = Aucune donnée de benchmark disponible pour ce laboratoire.
-
-# Tree
-tree.legend.established = Haplogroupe établi
-tree.legend.updated = Mis à jour au cours de la dernière année
-tree.noData = Aucune donnée d''arbre disponible
-tree.loading = Chargement de l''arbre des haplogroupes...
-tree.jumpTo = Aller à l''haplogroupe :
-tree.go = Aller
-tree.reRoot = Cliquer pour changer la racine
-tree.verticalCladogram = Cladogramme vertical
-tree.clickToSeeVariants = Cliquer pour voir {0} variant(s) définissant(s)
-
-# Sidebar
-sidebar.title = Variant(s) définissant(s) pour {0}
-sidebar.noVariants = Aucun variant définissant trouvé pour {0}.
-sidebar.type = Type : {0}
-sidebar.refLocation = Emplacement(s) de référence :
-
-# References Page
-references.title = Publications
-references.viewMap = Voir la carte
-references.search.placeholder = Rechercher par titre, auteur, DOI...
-references.search.help = Rechercher des publications par titre, nom d''auteur, revue ou DOI
-
-# Publication
-publication.authors = Auteurs :
-publication.authors.etAl = (Voir l''article pour la liste complète des auteurs)
-publication.journal = Revue :
-publication.published = Publié :
-publication.abstract = Résumé
-publication.additionalDetails = Détails supplémentaires
-publication.classification = Classification :
-publication.access = Accès à la publication :
-publication.impact = Métriques d''impact :
-publication.cited = Cité : {0}
-publication.percentile = Percentile : {0}%
-publication.genomicStudies = Études génomiques
-publication.sampleCount = Nombre d''échantillons : {0}
-publication.showSamples = Afficher les échantillons
-publication.hideSamples = Masquer les échantillons
-publication.directLink = Lien direct
-publication.source.ena = Archive européenne des nucléotides
-publication.source.ncbi = BioProject NCBI
-publication.list.notFound = Aucune publication trouvée correspondant à «
{0} ». Essayez un autre terme de recherche.
-publication.list.empty = Aucune publication disponible.
-publication.list.total = {0} publications
-publication.submit.title = Soumettre une publication
-publication.submit.doi.label = DOI
-publication.submit.doi.placeholder = Entrez le DOI ou l''URL du DOI (ex. : 10.1234/exemple ou https://doi.org/10.1234/exemple)
-publication.submit.doi.help = Entrez un DOI (10.1234/exemple) ou l''URL complète du DOI (https://doi.org/10.1234/exemple)
-publication.submit.ena.label = Accession d''étude d''archive de séquences (optionnel)
-publication.submit.ena.placeholder = Entrez l''accession d''étude d''archive si disponible (ex. : PRJEB12345, PRJNA123456)
-publication.submit.ena.help = Si l''article mentionne une étude d''archive de séquences (ex. : ENA ou SRA), entrez son accession ici
-publication.submit.forceRefresh = Forcer l''actualisation (mettre à jour même si la publication existe déjà)
-
-# Publication Candidates
-publicationCandidates.title = Revue des publications candidates
-publicationCandidates.heading = Publications candidates à examiner
-publicationCandidates.noCandidates = Aucune publication candidate en attente.
-publicationCandidates.table.title = Titre
-publicationCandidates.table.journal = Revue
-publicationCandidates.table.date = Date de publication
-publicationCandidates.table.relevance = Score de pertinence
-publicationCandidates.table.status = Statut
-publicationCandidates.table.actions = Actions
-publicationCandidates.action.accept = Accepter
-publicationCandidates.action.reject = Rejeter
-publicationCandidates.action.reviewed = Examiné
-publicationCandidates.confirmAccept = Êtes-vous sûr(e) de vouloir accepter ce candidat et l''importer comme nouvelle publication ?
-publicationCandidates.confirmReject = Êtes-vous sûr(e) de vouloir rejeter ce candidat ?
-publicationCandidates.acceptSuccess = Candidat « {0} » accepté et importé avec succès.
-publicationCandidates.acceptFailed = Échec de l''acceptation du candidat ou de l''importation de la publication.
-publicationCandidates.acceptError = Erreur lors de l''acceptation du candidat : {0}
-publicationCandidates.rejectSuccess = Candidat rejeté avec succès.
-publicationCandidates.rejectFailed = Échec du rejet du candidat.
-publicationCandidates.rejectError = Erreur lors du rejet du candidat : {0}
-publicationCandidates.promptRejectReason = Veuillez entrer une raison pour rejeter ce candidat.
-publicationCandidates.emptyRejectReasonAlert = La raison du rejet ne peut pas être vide.
-
-# Auth
-auth.login.title = Connexion
-auth.login.heading = Connexion
-auth.login.federation.title = Connexion fédérée
-auth.login.federation.description = Connectez-vous avec votre identité AT Protocol depuis n''importe quel fournisseur compatible (Bluesky, PDS auto-hébergé ou futurs comptes DecodingUs).
-auth.login.handle = Identifiant ou DID
-auth.login.handle.placeholder = alice.bsky.social ou did:plc:...
-auth.login.handle.help = Entrez votre identifiant complet (ex. : alice.bsky.social) ou DID. Nous trouverons votre fournisseur d''identité automatiquement.
-auth.login.password = Mot de passe d''application
-auth.login.passwordHelp = Utilisez un mot de passe d''application de votre fournisseur d''identité.
En savoir plus sur les mots de passe d''application .
-auth.login.submit = Se connecter
-
-# Auth - App Password Help
-auth.appPasswordHelp.title = Qu''est-ce qu''un mot de passe d''application ?
-auth.appPasswordHelp.heading = Qu''est-ce qu''un mot de passe d''application ?
-auth.appPasswordHelp.p1 = Un mot de passe d''application est un mot de passe unique généré par votre fournisseur d''identité AT Protocol (comme Bluesky, un PDS auto-hébergé ou d''autres services compatibles) spécifiquement pour les applications tierces comme celle-ci. Il vous permet d''accorder l''accès à une application sans partager votre mot de passe principal.
-auth.appPasswordHelp.p2 = L''utilisation de mots de passe d''application renforce votre sécurité en :
-auth.appPasswordHelp.li1 = Vous permettant de révoquer l''accès pour une seule application sans changer votre mot de passe principal.
-auth.appPasswordHelp.li2 = Limitant les permissions qu''une application peut avoir (bien que pour cette application, elle accorde un accès complet à la session).
-auth.appPasswordHelp.generateSteps = Comment générer un mot de passe d''application :
-auth.appPasswordHelp.step1 = Ouvrez les paramètres de votre client AT Protocol ou de votre fournisseur d''identité (ex. : bsky.app pour Bluesky, ou le panneau d''administration de votre PDS auto-hébergé).
-auth.appPasswordHelp.step2 = Accédez à
Paramètres >
Mots de passe d''application (ou
Confidentialité et sécurité >
Mots de passe d''application ).
-auth.appPasswordHelp.step3 = Cliquez sur
Ajouter un mot de passe d''application .
-auth.appPasswordHelp.step4 = Donnez-lui un nom descriptif (ex. : « Appli DecodingUs »).
-auth.appPasswordHelp.step5 = Copiez le mot de passe généré. C''est la seule fois que vous le verrez.
-auth.appPasswordHelp.note = Ce mot de passe généré est celui que vous utiliserez dans le formulaire de connexion. Traitez-le comme un mot de passe ordinaire.
-
-# Profile
-profile.title = Profil utilisateur
-profile.heading = Votre profil
-profile.accountDetails = Détails du compte
-profile.handle = Identifiant
-profile.did = DID
-profile.editProfile = Modifier le profil
-profile.displayName = Nom d''affichage
-profile.displayNameHelp = Ce nom sera visible par les autres utilisateurs dans le système de messagerie.
-profile.save = Enregistrer le profil
-
-# Variant Browser
-nav.variants = Navigateur de variants
-variants.browser.title = Navigateur de variants
-variants.browser.heading = Navigateur de variants
-variants.browser.description = Recherchez dans la base de données des variants Y-ADN et ADN-mt. Trouvez des SNP par nom, rsID ou alias.
-variants.browser.variants = Variants
-variants.browser.searchPlaceholder = Rechercher par rsID, nom de SNP ou alias (ex. : M269, rs9786076)...
-variants.browser.searchHelp = Entrez au moins 2 caractères pour rechercher. Recherche dans les noms, rsID et alias de variants.
-variants.browser.selectVariant = Sélectionnez un variant pour voir les détails
-variants.browser.foundMatching = {0} groupes de variants trouvés pour « {1} »
-variants.browser.showingTotal = Affichage de {0} groupes de variants au total
-variants.browser.pageOf = Page {0} sur {1}
-variants.browser.noResults = Aucun variant trouvé pour « {0} ».
-variants.browser.enterSearch = Entrez un terme de recherche pour trouver des variants.
-variants.browser.col.name = Nom / rsID
-variants.browser.col.alleles = Anc/Dér
-variants.browser.col.type = Type
-variants.browser.col.builds = Builds
-variants.browser.strandDiffers = Le brin diffère entre les builds
-
-# Variant Detail Panel
-variants.detail.rsId = rsID
-variants.detail.commonName = Nom commun
-variants.detail.ancestral = Ancestral
-variants.detail.derived = Dérivé
-variants.detail.type = Type
-variants.detail.status = Statut de dénomination
-variants.detail.altNames = Noms alternatifs
-variants.detail.source = Source
-variants.detail.refBuilds = Builds de référence
-variants.detail.strandDiff = Différence de brin (complément inverse)
-variants.detail.build = Build
-variants.detail.position = Position
-variants.detail.alleles = Allèles
-variants.detail.motifRepeats = Motif / Répétitions
-variants.detail.usedBy = Utilisé par les haplogroupes
-variants.detail.noHaplogroups = Ce variant n''est associé à aucun haplogroupe.
-variants.detail.more = plus
-variants.detail.aliasType.snpNames = Noms de SNP
-variants.detail.aliasType.dbsnp = Identifiants dbSNP
-
-# Language
-lang.switch = Langue
-lang.en = English
-lang.fr = Français
-lang.es = Español
diff --git a/conf/routes b/conf/routes
deleted file mode 100644
index 11a99f4d..00000000
--- a/conf/routes
+++ /dev/null
@@ -1,386 +0,0 @@
-# Routes
-# This file defines all application routes (Higher priority routes first)
-# https://www.playframework.com/documentation/latest/ScalaRouting
-# ~~~~
-
-# Health check for load balancers and container orchestration
-GET /health controllers.HomeController.health()
-
-GET / controllers.HomeController.index()
-GET /cookie-usage controllers.HomeController.cookieUsage()
-GET /terms controllers.HomeController.terms()
-GET /privacy controllers.HomeController.privacy()
-GET /faq controllers.HomeController.faq()
-GET /reputation controllers.HomeController.reputation()
-GET /how-to-submit-tree-data controllers.HomeController.howToSubmitTreeData()
-
-GET /language/:lang controllers.LanguageController.switchLanguage(lang: String)
-
-GET /sitemap.xml controllers.HomeController.sitemap()
-GET /robots.txt controllers.HomeController.robots()
-
-GET /ytree controllers.TreeController.ytree(rootHaplogroup: Option[String])
-GET /mtree controllers.TreeController.mtree(rootHaplogroup: Option[String])
-GET /ytree/fragment controllers.TreeController.yTreeFragment(rootHaplogroup: Option[String])
-GET /mtree/fragment controllers.TreeController.mTreeFragment(rootHaplogroup: Option[String])
-
-GET /haplogroups/snp-sidebar/placeholder controllers.TreeController.emptySnpDetailSidebarPlaceholder
-GET /haplogroups/snp-sidebar/:haplogroupName controllers.TreeController.getSnpDetailSidebar(haplogroupName: String, haplogroupType: models.HaplogroupType)
-
-
-GET /references controllers.PublicationController.index()
-GET /references/details controllers.PublicationController.getAllPublicationsWithDetailsHtml(page: Option[Int], pageSize: Option[Int], query: Option[String])
-GET /references/details/:publicationId/biosamples controllers.BiosampleReportController.getBiosampleReportHTML(publicationId: Int, page: Option[Int] ?= Some(1))
-
-GET /publications/submit controllers.PublicationController.showSubmissionForm()
-POST /publications/submit controllers.PublicationController.submitPaper()
-
-GET /coverage-benchmarks controllers.CoverageController.index()
-
-# Variant Browser (public read-only)
-GET /variants controllers.VariantBrowserController.index(query: Option[String] ?= None, page: Int ?= 1, pageSize: Int ?= 25)
-GET /variants/list controllers.VariantBrowserController.listFragment(query: Option[String] ?= None, page: Int ?= 1, pageSize: Int ?= 25)
-GET /variants/detail/:id controllers.VariantBrowserController.detailPanel(id: Int)
-
-# Contact form routes
-GET /contact controllers.ContactController.show()
-POST /contact controllers.ContactController.submit()
-GET /my-messages controllers.ContactController.myMessages()
-GET /my-messages/badge controllers.ContactController.userMessageBadge()
-
-# Support Admin (requires Admin role)
-GET /admin/messages controllers.SupportAdminController.listMessages(status: Option[String] ?= None, page: Int ?= 1, pageSize: Int ?= 20)
-GET /admin/messages/badge controllers.SupportAdminController.adminMessageBadge()
-GET /admin/messages/:id controllers.SupportAdminController.viewMessage(id: java.util.UUID)
-POST /admin/messages/:id/reply controllers.SupportAdminController.submitReply(id: java.util.UUID)
-GET /admin/messages/:id/status/:status controllers.SupportAdminController.updateStatus(id: java.util.UUID, status: String)
-
-GET /api/v1/y-tree controllers.TreeController.apiYTree(rootHaplogroup: Option[String])
-GET /api/v1/mt-tree controllers.TreeController.apiMTree(rootHaplogroup: Option[String])
-
-# Coverage API
-GET /api/v1/coverage/benchmarks controllers.CoverageController.apiBenchmarks()
-GET /coverage/labs controllers.CoverageController.labs
-GET /coverage/benchmarks/lab/:labId controllers.CoverageController.benchmarksByLabWithDetails(labId: Int)
-
-# Sequencer API
-GET /api/v1/sequencer/lab controllers.SequencerController.getLabByInstrumentId(instrument_id: String)
-GET /api/v1/sequencer/lab-instruments controllers.SequencerController.getAllLabInstruments
-POST /api/v1/sequencer/lab/associate controllers.SequencerController.associateLabWithInstrument()
-
-# Instrument Proposal Curator API
-GET /api/v1/curator/instrument-proposals controllers.InstrumentProposalController.listProposals(status: Option[String])
-GET /api/v1/curator/instrument-proposals/conflicts controllers.InstrumentProposalController.detectConflicts()
-GET /api/v1/curator/instrument-proposals/:id controllers.InstrumentProposalController.getProposalDetail(id: Int)
-POST /api/v1/curator/instrument-proposals/:id/accept controllers.InstrumentProposalController.acceptProposal(id: Int)
-POST /api/v1/curator/instrument-proposals/:id/reject controllers.InstrumentProposalController.rejectProposal(id: Int)
-
-GET /api/v1/references/details controllers.PublicationController.getAllPublicationsWithDetailsJson()
-GET /api/v1/references/details/:publicationId/biosamples controllers.BiosampleReportController.getBiosampleReportJSON(publicationId: Int)
-
-GET /biosamples/map controllers.BiosampleMapController.mapView()
-GET /biosamples/geo-data controllers.BiosampleMapController.geoData()
-
-# Sample studies endpoint
-GET /api/v1/biosample/studies controllers.BiosampleController.getSamplesWithStudies()
-
-# Biosample API endpoint
-PATCH /api/private/biosamples/:id controllers.BiosampleController.updateBiosample(id: Int)
-GET /api/private/biosamples/search controllers.BiosampleController.findByAliasOrAccession(query: String)
-
-POST /api/private/biosamples/publication-link controllers.BiosamplePublicationController.linkBiosampleToPublication
-
-# Biosample Original Haplogroup endpoints
-PUT /api/private/biosamples/:biosampleId/publications/:publicationId/haplogroup controllers.BiosampleOriginalHaplogroupController.updateOrCreateHaplogroup(biosampleId: Int, publicationId: Int)
-GET /api/private/biosamples/:biosampleId/publications/:publicationId/haplogroup controllers.BiosampleOriginalHaplogroupController.getHaplogroup(biosampleId: Int, publicationId: Int)
-DELETE /api/private/biosamples/:biosampleId/publications/:publicationId/haplogroup controllers.BiosampleOriginalHaplogroupController.deleteHaplogroup(biosampleId: Int, publicationId: Int)
-
-
-# PGP Biosample API endpoints
-POST /api/private/pgp/biosamples controllers.PgpBiosampleController.create
-POST /api/private/pgp/biosamples/:sampleGuid/sequences controllers.BiosampleDataController.addSequenceData(sampleGuid: java.util.UUID)
-POST /api/private/pgp/biosamples/:sampleGuid/publication controllers.BiosampleDataController.linkPublication(sampleGuid: java.util.UUID)
-
-# External Biosample API endpoints
-POST /api/private/external/biosamples controllers.ExternalBiosampleController.create
-POST /api/private/external/biosamples/:sampleGuid/sequences controllers.BiosampleDataController.addSequenceData(sampleGuid: java.util.UUID)
-POST /api/private/external/biosamples/:sampleGuid/publication controllers.BiosampleDataController.linkPublication(sampleGuid: java.util.UUID)
-
-# Specimen Donor endpoints
-POST /api/private/donors/merge controllers.SpecimenDonorController.mergeDonors()
-
-# Sequencing Lab private admin CRUD (not exposed in Swagger)
-GET /api/private/sequencing-labs controllers.SequencingLabAdminController.list
-GET /api/private/sequencing-labs/:id controllers.SequencingLabAdminController.get(id: Int)
-POST /api/private/sequencing-labs controllers.SequencingLabAdminController.create
-PATCH /api/private/sequencing-labs/:id controllers.SequencingLabAdminController.update(id: Int)
-DELETE /api/private/sequencing-labs/:id controllers.SequencingLabAdminController.delete(id: Int)
-
-# Atmosphere Firehose
-POST /api/firehose/event controllers.FirehoseController.processEvent
-
-# Publication Discovery
-POST /api/private/publication-discovery/run controllers.PublicationDiscoveryController.triggerDiscovery()
-
-# Genomics Admin
-GET /admin/genomics controllers.GenomicsAdminController.dashboard()
-POST /api/private/genomics/ybrowse/update controllers.GenomicsAdminController.triggerYBrowseUpdate()
-POST /api/private/genomics/hipstr/update controllers.GenomicsAdminController.triggerHipStrUpdate()
-
-# Private Variant API
-POST /api/private/variants/builds controllers.VariantApiController.bulkAddBuilds()
-POST /api/private/variants/rsids controllers.VariantApiController.bulkUpdateRsIds()
-
-# Private Alias Source Management API
-GET /api/private/aliases/sources/stats controllers.VariantApiController.getAliasSourceStats()
-GET /api/private/aliases/sources/preview controllers.VariantApiController.previewAliasSourceUpdate(aliasPrefix: String, currentSource: String)
-POST /api/private/aliases/sources controllers.VariantApiController.bulkUpdateAliasSources()
-
-# DU Naming Authority API (X-API-Key secured)
-POST /api/private/variants/:variantId/du-name controllers.VariantApiController.assignDuName(variantId: Int)
-POST /api/private/variants/du-names controllers.VariantApiController.bulkAssignDuNames()
-GET /api/private/variants/du-names/next controllers.VariantApiController.previewNextDuName()
-
-# Authentication
-GET /login controllers.AuthController.login
-POST /login controllers.AuthController.authenticate
-POST /logout controllers.AuthController.logout
-GET /app-password-help controllers.AuthController.showAppPasswordHelp()
-
-# Cookie Consent (GDPR)
-GET /cookies/check controllers.CookieConsentController.checkConsent
-POST /cookies/accept controllers.CookieConsentController.acceptConsent
-
-# User Profile
-GET /profile controllers.ProfileController.view
-POST /profile controllers.ProfileController.update
-
-# Public Variant API
-GET /api/v1/variants controllers.VariantPublicApiController.searchVariants(query: Option[String] ?= None, page: Int ?= 1, pageSize: Int ?= 25)
-GET /api/v1/variants/export controllers.VariantPublicApiController.downloadExport()
-GET /api/v1/variants/export/metadata controllers.VariantPublicApiController.exportMetadata()
-GET /api/v1/variants/:variantId controllers.VariantPublicApiController.getVariantById(variantId: Int)
-GET /api/v1/haplogroups/:haplogroupName/variants controllers.VariantPublicApiController.getVariantsByHaplogroup(haplogroupName: String)
-
-# Private Variant Export API (X-API-Key secured)
-POST /api/private/variants/export/generate controllers.VariantPublicApiController.triggerExport()
-
-# Genome Regions API
-GET /api/v1/genome-regions controllers.GenomeRegionsApiController.listBuilds()
-GET /api/v1/genome-regions/:build controllers.GenomeRegionsApiController.getRegions(build: String)
-
-# =============================================
-# Genome Regions API Management (X-API-Key secured)
-# =============================================
-# Genome Regions
-GET /api/v1/manage/genome-regions controllers.GenomeRegionsApiManagementController.listRegions(regionType: Option[String], build: Option[String], page: Int ?= 1, pageSize: Int ?= 25)
-GET /api/v1/manage/genome-regions/:id controllers.GenomeRegionsApiManagementController.getRegion(id: Int)
-POST /api/v1/manage/genome-regions controllers.GenomeRegionsApiManagementController.createRegion()
-PUT /api/v1/manage/genome-regions/:id controllers.GenomeRegionsApiManagementController.updateRegion(id: Int)
-DELETE /api/v1/manage/genome-regions/:id controllers.GenomeRegionsApiManagementController.deleteRegion(id: Int)
-POST /api/v1/manage/genome-regions/bulk controllers.GenomeRegionsApiManagementController.bulkCreateRegions()
-POST /api/v1/manage/genome-regions/bootstrap controllers.GenomicsAdminController.triggerRegionsBootstrap()
-
-# =============================================
-# Haplogroup Tree Merge API (X-API-Key secured)
-# =============================================
-POST /api/v1/manage/haplogroups/merge controllers.HaplogroupTreeMergeController.mergeFullTree()
-POST /api/v1/manage/haplogroups/merge/subtree controllers.HaplogroupTreeMergeController.mergeSubtree()
-POST /api/v1/manage/haplogroups/merge/preview controllers.HaplogroupTreeMergeController.previewMerge()
-
-# =============================================
-# Tree Versioning API (X-API-Key secured)
-# =============================================
-# Change Set Management
-GET /api/v1/manage/change-sets controllers.TreeVersioningApiController.listChangeSets(haplogroupType: Option[String], status: Option[String], page: Int ?= 1, pageSize: Int ?= 20)
-GET /api/v1/manage/change-sets/:id controllers.TreeVersioningApiController.getChangeSetDetails(id: Int)
-POST /api/v1/manage/change-sets/:id/start-review controllers.TreeVersioningApiController.startReview(id: Int)
-POST /api/v1/manage/change-sets/:id/apply controllers.TreeVersioningApiController.applyChangeSet(id: Int)
-POST /api/v1/manage/change-sets/:id/discard controllers.TreeVersioningApiController.discardChangeSet(id: Int)
-
-# Change Review
-GET /api/v1/manage/change-sets/:id/changes/pending controllers.TreeVersioningApiController.getPendingChanges(id: Int, limit: Int ?= 50)
-POST /api/v1/manage/change-sets/:changeSetId/changes/:changeId/review controllers.TreeVersioningApiController.reviewChange(changeSetId: Int, changeId: Int)
-POST /api/v1/manage/change-sets/:id/approve-all controllers.TreeVersioningApiController.approveAllPending(id: Int)
-
-# Change Set Comments
-POST /api/v1/manage/change-sets/:id/comments controllers.TreeVersioningApiController.addComment(id: Int)
-GET /api/v1/manage/change-sets/:id/comments controllers.TreeVersioningApiController.listComments(id: Int)
-
-# Tree Diff
-GET /api/v1/manage/change-sets/:id/diff controllers.TreeVersioningApiController.getTreeDiff(id: Int)
-GET /api/v1/manage/tree-diff/:haplogroupType controllers.TreeVersioningApiController.getActiveTreeDiff(haplogroupType: String)
-
-# Discovery Proposals API
-GET /api/v1/discovery/proposals controllers.DiscoveryApiController.listProposals(type: Option[String], status: Option[String])
-GET /api/v1/discovery/proposals/:id controllers.DiscoveryApiController.getProposalDetails(id: Int)
-POST /api/v1/discovery/proposals/:id/start-review controllers.DiscoveryApiController.startReview(id: Int)
-POST /api/v1/discovery/proposals/:id/accept controllers.DiscoveryApiController.acceptProposal(id: Int)
-POST /api/v1/discovery/proposals/:id/reject controllers.DiscoveryApiController.rejectProposal(id: Int)
-POST /api/v1/discovery/proposals/:id/promote controllers.DiscoveryApiController.promoteProposal(id: Int)
-GET /api/v1/discovery/proposals/:id/audit controllers.DiscoveryApiController.getAuditTrail(id: Int)
-
-# Curator Workflow
-GET /admin/publication-candidates controllers.PublicationCandidateController.listCandidates(page: Int ?= 1, pageSize: Int ?= 20, status: String ?= "pending")
-POST /admin/publication-candidates/:id/accept controllers.PublicationCandidateController.accept(id: Int)
-POST /admin/publication-candidates/:id/reject controllers.PublicationCandidateController.reject(id: Int)
-POST /admin/publication-candidates/:id/defer controllers.PublicationCandidateController.defer(id: Int)
-POST /admin/publication-candidates/bulk controllers.PublicationCandidateController.bulkAction()
-
-# Curator Tools (requires TreeCurator or Admin role)
-GET /curator controllers.CuratorController.dashboard
-
-# Curator - Change Sets (Tree Versioning)
-GET /curator/change-sets controllers.TreeVersioningCuratorController.listChangeSets(hgType: Option[String], status: Option[String], pageSize: Int ?= 20)
-GET /curator/change-sets/fragment controllers.TreeVersioningCuratorController.changeSetsFragment(hgType: Option[String], status: Option[String], page: Int ?= 1, pageSize: Int ?= 20)
-GET /curator/change-sets/:id/panel controllers.TreeVersioningCuratorController.changeSetDetailPanel(id: Int)
-GET /curator/change-sets/:id/changes/pending controllers.TreeVersioningCuratorController.pendingChangesFragment(id: Int, limit: Int ?= 50)
-POST /curator/change-sets/:id/start-review controllers.TreeVersioningCuratorController.startReview(id: Int)
-POST /curator/change-sets/:id/apply controllers.TreeVersioningCuratorController.applyChangeSet(id: Int)
-POST /curator/change-sets/:id/discard controllers.TreeVersioningCuratorController.discardChangeSet(id: Int)
-POST /curator/change-sets/:id/approve-all controllers.TreeVersioningCuratorController.approveAllPending(id: Int)
-POST /curator/change-sets/:changeSetId/changes/:changeId/review controllers.TreeVersioningCuratorController.reviewChange(changeSetId: Int, changeId: Int)
-GET /curator/change-sets/:id/diff controllers.TreeVersioningCuratorController.diffView(id: Int)
-GET /curator/change-sets/:id/diff/fragment controllers.TreeVersioningCuratorController.diffFragment(id: Int)
-GET /curator/change-sets/:id/tree-preview controllers.TreeVersioningCuratorController.treePreview(id: Int)
-GET /curator/change-sets/:id/ambiguity-report controllers.TreeVersioningCuratorController.ambiguityReport(id: Int)
-GET /curator/change-sets/:id/ambiguity-report/download controllers.TreeVersioningCuratorController.downloadAmbiguityReport(id: Int)
-
-# Curator - Change Set Conflict Resolutions
-GET /curator/change-sets/:id/resolutions controllers.TreeVersioningCuratorController.listResolutions(id: Int)
-GET /curator/change-sets/:id/deferred controllers.TreeVersioningCuratorController.listDeferredItems(id: Int)
-POST /curator/change-sets/:id/resolve/reparent controllers.TreeVersioningCuratorController.resolveReparent(id: Int)
-POST /curator/change-sets/:id/resolve/edit-variants controllers.TreeVersioningCuratorController.resolveEditVariants(id: Int)
-POST /curator/change-sets/:id/resolve/merge-existing controllers.TreeVersioningCuratorController.resolveMergeExisting(id: Int)
-POST /curator/change-sets/:id/resolve/defer controllers.TreeVersioningCuratorController.resolveDefer(id: Int)
-DELETE /curator/change-sets/:changeSetId/resolutions/:resolutionId controllers.TreeVersioningCuratorController.cancelResolution(changeSetId: Int, resolutionId: Int)
-
-# Curator - Haplogroups
-GET /curator/haplogroups controllers.CuratorController.listHaplogroups(query: Option[String], hgType: Option[String], page: Int ?= 1, pageSize: Int ?= 20)
-GET /curator/haplogroups/fragment controllers.CuratorController.haplogroupsFragment(query: Option[String], hgType: Option[String], page: Int ?= 1, pageSize: Int ?= 20)
-GET /curator/haplogroups/search.json controllers.CuratorController.searchHaplogroupsJson(query: Option[String], hgType: Option[String])
-GET /curator/haplogroups/new controllers.CuratorController.createHaplogroupForm
-POST /curator/haplogroups controllers.CuratorController.createHaplogroup
-GET /curator/haplogroups/:id/panel controllers.CuratorController.haplogroupDetailPanel(id: Int)
-GET /curator/haplogroups/:id/edit controllers.CuratorController.editHaplogroupForm(id: Int)
-POST /curator/haplogroups/:id controllers.CuratorController.updateHaplogroup(id: Int)
-DELETE /curator/haplogroups/:id controllers.CuratorController.deleteHaplogroup(id: Int)
-
-# Curator - Haplogroup Tree Restructuring
-GET /curator/haplogroups/:id/split controllers.CuratorController.splitBranchForm(id: Int)
-POST /curator/haplogroups/:id/split controllers.CuratorController.splitBranch(id: Int)
-GET /curator/haplogroups/:id/merge controllers.CuratorController.mergeConfirmForm(id: Int)
-POST /curator/haplogroups/:id/merge controllers.CuratorController.mergeIntoParent(id: Int)
-GET /curator/haplogroups/:id/reparent controllers.CuratorController.reparentForm(id: Int)
-POST /curator/haplogroups/:id/reparent controllers.CuratorController.reparent(id: Int)
-
-# Curator - Haplogroup-Variant Associations
-GET /curator/haplogroups/:id/variants/search controllers.CuratorController.searchVariantsForHaplogroup(id: Int, q: Option[String])
-POST /curator/haplogroups/:hgId/variants/:variantId controllers.CuratorController.addVariantToHaplogroup(hgId: Int, variantId: Int)
-DELETE /curator/haplogroups/:hgId/variants/:variantId controllers.CuratorController.removeVariantFromHaplogroup(hgId: Int, variantId: Int)
-GET /curator/haplogroup-variants/:hvId/history controllers.CuratorController.haplogroupVariantHistory(hvId: Int)
-
-# Curator - Variants
-GET /curator/variants controllers.CuratorController.listVariants(query: Option[String], page: Int ?= 1, pageSize: Int ?= 20)
-GET /curator/variants/fragment controllers.CuratorController.variantsFragment(query: Option[String], page: Int ?= 1, pageSize: Int ?= 20)
-GET /curator/variants/new controllers.CuratorController.createVariantForm
-POST /curator/variants controllers.CuratorController.createVariant
-GET /curator/variants/:id/panel controllers.CuratorController.variantDetailPanel(id: Int)
-GET /curator/variants/:id/edit controllers.CuratorController.editVariantForm(id: Int)
-POST /curator/variants/:id controllers.CuratorController.updateVariant(id: Int)
-DELETE /curator/variants/:id controllers.CuratorController.deleteVariant(id: Int)
-# Variant group editing removed - now using VariantV2 with consolidated coordinates
-
-# Curator - Audit
-GET /curator/audit/:entityType/:entityId controllers.CuratorController.auditHistory(entityType: String, entityId: Int)
-
-# =============================================
-# Curator - Genome Regions (session auth + permissions)
-# =============================================
-# Genome Regions
-GET /curator/genome-regions controllers.GenomeRegionsCuratorController.listRegions(regionType: Option[String], build: Option[String], page: Int ?= 1, pageSize: Int ?= 25)
-GET /curator/genome-regions/fragment controllers.GenomeRegionsCuratorController.regionsFragment(regionType: Option[String], build: Option[String], page: Int ?= 1, pageSize: Int ?= 25)
-GET /curator/genome-regions/:id/detail controllers.GenomeRegionsCuratorController.regionDetailPanel(id: Int)
-GET /curator/genome-regions/new controllers.GenomeRegionsCuratorController.createRegionForm
-POST /curator/genome-regions controllers.GenomeRegionsCuratorController.createRegion
-GET /curator/genome-regions/:id/edit controllers.GenomeRegionsCuratorController.editRegionForm(id: Int)
-POST /curator/genome-regions/:id controllers.GenomeRegionsCuratorController.updateRegion(id: Int)
-DELETE /curator/genome-regions/:id controllers.GenomeRegionsCuratorController.deleteRegion(id: Int)
-
-# =============================================
-# PDS Fleet Management API
-# =============================================
-# PDS-authenticated endpoints (signed by edge node private key)
-+nocsrf
-POST /api/v1/pds/heartbeat controllers.PdsFleetApiController.heartbeat()
-+nocsrf
-POST /api/v1/pds/submissions controllers.PdsFleetApiController.submitData()
-
-# Admin fleet management (X-API-Key secured)
-GET /api/v1/pds/fleet/summary controllers.PdsFleetApiController.getFleetSummary
-GET /api/v1/pds/fleet/nodes controllers.PdsFleetApiController.listNodes(status: Option[String])
-GET /api/v1/pds/fleet/nodes/:did controllers.PdsFleetApiController.getNode(did: String)
-DELETE /api/v1/pds/fleet/nodes/:did controllers.PdsFleetApiController.removeNode(did: String)
-POST /api/v1/pds/fleet/mark-stale controllers.PdsFleetApiController.markStaleOffline()
-GET /api/v1/pds/submissions/pending controllers.PdsFleetApiController.getPendingSubmissions(type: Option[String], limit: Int ?= 100)
-POST /api/v1/pds/submissions/:id/accept controllers.PdsFleetApiController.acceptSubmission(id: Int)
-POST /api/v1/pds/submissions/:id/reject controllers.PdsFleetApiController.rejectSubmission(id: Int)
-GET /api/v1/pds/submissions/summary/:did controllers.PdsFleetApiController.getNodeSubmissionSummary(did: String)
-
-# =============================================
-# Patronage API (X-API-Key secured)
-# =============================================
-POST /api/v1/patronage/subscriptions controllers.PatronageApiController.createSubscription()
-POST /api/v1/patronage/subscriptions/:id/cancel controllers.PatronageApiController.cancelSubscription(id: Int)
-GET /api/v1/patronage/subscriptions/user/:userId controllers.PatronageApiController.getUserSubscriptions(userId: java.util.UUID)
-GET /api/v1/patronage/subscriptions/active/:userId controllers.PatronageApiController.getSubscription(userId: java.util.UUID)
-GET /api/v1/patronage/is-patron/:userId controllers.PatronageApiController.isPatron(userId: java.util.UUID)
-GET /api/v1/patronage/summary controllers.PatronageApiController.getPatronSummary
-POST /api/v1/patronage/expire-overdue controllers.PatronageApiController.expireOverdue()
-
-# =============================================
-# IBD Match Discovery API (PDS-authenticated)
-# =============================================
-+nocsrf
-GET /api/v1/discovery/suggestions controllers.MatchDiscoveryController.getSuggestions(type: Option[String], limit: Int ?= 20)
-+nocsrf
-POST /api/v1/discovery/suggestions/:id/dismiss controllers.MatchDiscoveryController.dismissSuggestion(id: Long)
-+nocsrf
-GET /api/v1/discovery/population/:sampleGuid controllers.MatchDiscoveryController.getPopulationBreakdown(sampleGuid: java.util.UUID)
-+nocsrf
-GET /api/v1/discovery/population/overlap/:guid1/:guid2 controllers.MatchDiscoveryController.getPopulationOverlap(guid1: java.util.UUID, guid2: java.util.UUID)
-
-# =============================================
-# IBD Match Request & Consent API (PDS-authenticated)
-# =============================================
-+nocsrf
-POST /api/v1/matches/request controllers.MatchRequestController.createRequest()
-+nocsrf
-GET /api/v1/matches/requests/pending controllers.MatchRequestController.getPendingRequests()
-+nocsrf
-GET /api/v1/matches/requests/sent controllers.MatchRequestController.getSentRequests()
-+nocsrf
-POST /api/v1/matches/requests/:uri/cancel controllers.MatchRequestController.cancelRequest(uri: String)
-+nocsrf
-POST /api/v1/matches/consent controllers.MatchRequestController.submitConsent()
-+nocsrf
-GET /api/v1/matches/consent/status/:requestUri controllers.MatchRequestController.getConsentStatus(requestUri: String)
-
-# =============================================
-# IBD Relay API (PDS-authenticated)
-# =============================================
-+nocsrf
-POST /api/v1/ibd/relay/session controllers.IbdRelayController.createSession()
-+nocsrf
-GET /api/v1/ibd/relay/session/:sessionId controllers.IbdRelayController.getSessionStatus(sessionId: String)
-GET /api/v1/ibd/relay/:sessionId controllers.IbdRelayController.relay(sessionId: String)
-
-# --- API Routes (Handled by Tapir, including Swagger UI) ---
-POST /api/registerPDS controllers.PDSRegistrationController.registerPDS()
-
-# Delegate all requests starting with /api to the Tapir-based ApiRouter
--> /api controllers.ApiRouter
-
-# Map static resources from the /public folder to the /assets URL path
-GET /assets/*file controllers.Assets.versioned(path="/public", file: Asset)
--> /webjars webjars.Routes
\ No newline at end of file
diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
deleted file mode 100644
index 72969df6..00000000
--- a/docker-compose.prod.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-# =============================================================================
-# DecodingUs Production Overrides
-# =============================================================================
-# Usage: docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
-# =============================================================================
-
-services:
- app:
- image: ${DOCKER_REGISTRY:-}decodingus:${IMAGE_TAG:-latest}
- build:
- context: .
- environment:
- # Production database (use RDS or external PostgreSQL)
- - SLICK_DBS_DEFAULT_DB_URL=${DATABASE_URL}
- - SLICK_DBS_DEFAULT_DB_USER=${DATABASE_USER}
- - SLICK_DBS_DEFAULT_DB_PASSWORD=${DATABASE_PASSWORD}
- - SLICK_DBS_METADATA_DB_URL=${METADATA_DATABASE_URL:-${DATABASE_URL}}
- - SLICK_DBS_METADATA_DB_USER=${DATABASE_USER}
- - SLICK_DBS_METADATA_DB_PASSWORD=${DATABASE_PASSWORD}
- # Security
- - APPLICATION_SECRET=${APPLICATION_SECRET}
- - PLAY_HTTP_SECRET_KEY=${APPLICATION_SECRET}
- # Production settings
- - PLAY_EVOLUTIONS_AUTOCOMMIT=false
- - ENABLE_RECAPTCHA=${ENABLE_RECAPTCHA:-true}
- - RECAPTCHA_SECRET_KEY=${RECAPTCHA_SECRET_KEY}
- - RECAPTCHA_SITE_KEY=${RECAPTCHA_SITE_KEY}
- # Contact
- - CONTACT_RECIPIENT_EMAIL=${CONTACT_RECIPIENT_EMAIL}
- ports:
- - "127.0.0.1:9000:9000" # Only expose to localhost (nginx will proxy)
- restart: always
- logging:
- driver: "json-file"
- options:
- max-size: "10m"
- max-file: "5"
-
- # In production, typically use RDS instead of containerized PostgreSQL
- # Comment out or remove the db service when using external database
- db:
- profiles:
- - with-db # Only start if explicitly requested: docker compose --profile with-db up
diff --git a/docker-compose.yml b/docker-compose.yml
deleted file mode 100644
index 5310260c..00000000
--- a/docker-compose.yml
+++ /dev/null
@@ -1,72 +0,0 @@
-# =============================================================================
-# DecodingUs Docker Compose Configuration
-# =============================================================================
-# Development: docker compose up
-# Production: docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
-# =============================================================================
-
-services:
- # ---------------------------------------------------------------------------
- # Application
- # ---------------------------------------------------------------------------
- app:
- build:
- context: .
- dockerfile: Dockerfile
- container_name: decodingus-app
- ports:
- - "9000:9000"
- environment:
- # Database connection (override in production)
- - SLICK_DBS_DEFAULT_DB_URL=jdbc:postgresql://db:5432/decodingus_db
- - SLICK_DBS_DEFAULT_DB_USER=decodingus_user
- - SLICK_DBS_DEFAULT_DB_PASSWORD=decodingus_password
- - SLICK_DBS_METADATA_DB_URL=jdbc:postgresql://db:5432/decodingus_metadata
- - SLICK_DBS_METADATA_DB_USER=decodingus_user
- - SLICK_DBS_METADATA_DB_PASSWORD=decodingus_password
- # Play configuration
- - APPLICATION_SECRET=${APPLICATION_SECRET:-changeme}
- - PLAY_HTTP_SECRET_KEY=${APPLICATION_SECRET:-changeme}
- depends_on:
- db:
- condition: service_healthy
- networks:
- - decodingus-network
- restart: unless-stopped
- healthcheck:
- test: ["CMD", "curl", "-f", "http://localhost:9000/health"]
- interval: 30s
- timeout: 10s
- retries: 3
- start_period: 60s
-
- # ---------------------------------------------------------------------------
- # PostgreSQL Database
- # ---------------------------------------------------------------------------
- db:
- image: postgis/postgis:16-3.4-alpine
- container_name: decodingus-db
- environment:
- - POSTGRES_USER=decodingus_user
- - POSTGRES_PASSWORD=decodingus_password
- - POSTGRES_DB=decodingus_db
- volumes:
- - postgres_data:/var/lib/postgresql/data
- - ./docker/init-db.sql:/docker-entrypoint-initdb.d/init-db.sql:ro
- ports:
- - "5432:5432"
- networks:
- - decodingus-network
- restart: unless-stopped
- healthcheck:
- test: ["CMD-SHELL", "pg_isready -U decodingus_user -d decodingus_db"]
- interval: 10s
- timeout: 5s
- retries: 5
-
-networks:
- decodingus-network:
- driver: bridge
-
-volumes:
- postgres_data:
diff --git a/docker/init-db.sql b/docker/init-db.sql
deleted file mode 100644
index 85137291..00000000
--- a/docker/init-db.sql
+++ /dev/null
@@ -1,28 +0,0 @@
--- =============================================================================
--- DecodingUs Database Initialization
--- =============================================================================
--- This script runs once when the PostgreSQL container is first created.
--- It sets up the required databases and extensions.
--- =============================================================================
-
--- Create the metadata database (main database is created by POSTGRES_DB env var)
-CREATE DATABASE decodingus_metadata;
-
--- Connect to main database and enable extensions
-\c decodingus_db
-
--- PostGIS for geospatial data
-CREATE EXTENSION IF NOT EXISTS postgis;
-
--- LTree for hierarchical data (haplogroup trees)
-CREATE EXTENSION IF NOT EXISTS ltree;
-
--- pg_trgm for fuzzy text search
-CREATE EXTENSION IF NOT EXISTS pg_trgm;
-
--- Connect to metadata database and enable extensions
-\c decodingus_metadata
-
-CREATE EXTENSION IF NOT EXISTS postgis;
-CREATE EXTENSION IF NOT EXISTS ltree;
-CREATE EXTENSION IF NOT EXISTS pg_trgm;
diff --git a/documents/API_Improvement_Report.md b/documents/API_Improvement_Report.md
deleted file mode 100644
index bc37496d..00000000
--- a/documents/API_Improvement_Report.md
+++ /dev/null
@@ -1,60 +0,0 @@
-# API Improvement Report: Decoding Us as an App Layer for Edge Nodes
-
-This report outlines the strategy for evolving the Decoding Us API. It distinguishes between the existing **Research/Curator APIs** (used for academic data maintenance) and the required **Edge/Atmosphere APIs** (needed for the distributed citizen science network).
-
-## 1. Current State: The Research/Curator API
-
-The application currently contains a set of "Private APIs" (implemented as standard Play Controllers) primarily used for:
-* **Academic Data Integration:** Importing and managing data from research papers.
-* **Public Repository Sync:** Maintaining reference data from sources like the 1000 Genomes Project (1KG).
-* **Biosample Management:** Creation of samples by trusted administrators or researchers.
-
-**Status:**
-* **Authentication:** Relies on `ApiKeyFilter`. This is generally **adequate** for this specific use case, as these endpoints are intended for a limited set of trusted internal users or automated service accounts managing reference data.
-* **Definition:** Implemented purely as Play Actions. While functional, the lack of Tapir definitions makes it harder to generate client SDKs for internal tools.
-
-## 2. Future State: The Edge/Atmosphere API
-
-To function as an "App Layer in the Atmosphere" for thousands of distributed Personal Data Servers (PDS), a distinct set of APIs is required. These APIs operate in a fundamentally different trust domain (untrusted/semi-trusted distributed nodes) compared to the Curator APIs.
-
-### Critical Improvements Required
-
-#### A. Formalize Edge-Facing APIs (Tapir)
-* **Goal:** Define the interface for the distributed Edge application.
-* **Action:** Create new Tapir definitions for all Edge interactions. This allows the Edge application (likely written in a different language/context) to use auto-generated clients, ensuring robust communication.
-* **Scope:**
- * `EdgeOpsEndpoints.scala`: Heartbeats, Config, Updates.
- * `IngestionEndpoints.scala`: Submission of results (calls, segments).
-
-#### B. Implement DID-Based Authentication (The "Atmosphere" Layer)
-* **Context:** The current `ApiKeyFilter` is insufficient for the Edge layer. We cannot issue and manage static secrets for thousands of citizen scientists.
-* **Action:** Implement a decentralized auth flow.
- * **Registration:** `POST /api/v1/edge/register` - Edge Node exchanges its DID and Public Key.
- * **Verification:** Middleware that verifies requests signed by the PDS's private key against the registered DID.
-* **Distinction:** This auth mechanism specifically protects the *Edge* endpoints, while the *Curator* endpoints can potentially remain on the simpler API Key system (or migrate later).
-
-#### C. Operational Management (Fleet Control)
-* **Gap:** The current system has no visibility into the "Fleet".
-* **Action:** Implement "Atmosphere" control endpoints.
- * **Heartbeat:** Nodes report `online`, `idle`, `processing`.
- * **Configuration:** Server pushes distinct configurations (e.g., "Focus analysis on Haplogroup R-M269").
- * **Manifest:** Server publishes the "Target State" for the Edge software version.
-
-#### D. Granular Data Ingestion
-* **Gap:** Current ingestion is "Sample Creation" focused. Edge nodes need to submit *results* for existing samples.
-* **Action:** Create specialized endpoints for result submission.
- * `POST /api/v1/ingest/haplogroup-call`: "I found this variant."
- * `POST /api/v1/ingest/ibd-segments`: "I found this match (hash)."
- * `POST /api/v1/ingest/stats`: "I processed 5GB of data."
-
-## Summary of Recommendations
-
-| Feature Area | Curator API (Existing) | Edge/Atmosphere API (New) |
-| :--- | :--- | :--- |
-| **Primary User** | Internal Researchers / Scripts | Citizen Scientist PDS Nodes |
-| **Trust Model** | Trusted (Internal) | Untrusted/Semi-Trusted (Distributed) |
-| **Auth Method** | API Key (Current is OK) | **DID + Request Signing (Required)** |
-| **Definition** | Play Controllers | **Tapir Endpoints (Required)** |
-| **Action** | Maintain / Doc Improvements | **Build New Layer** |
-
-**Immediate Next Step:** Begin implementing `EdgeOpsEndpoints.scala` and the DID-based authentication middleware to establish the secure channel for the new layer.
\ No newline at end of file
diff --git a/documents/BGS-Firehouse-Sync.mermaid b/documents/BGS-Firehouse-Sync.mermaid
deleted file mode 100644
index 93f297e1..00000000
--- a/documents/BGS-Firehouse-Sync.mermaid
+++ /dev/null
@@ -1,25 +0,0 @@
-sequenceDiagram
- participant AS as App Server (Play View)
- participant PS1 as Participant PDS 1
- participant PS2 as Participant PDS 2
- participant DBR as Internal DID Registry
- participant IMB as Internal Message Bus (Kafka/Akka)
-
- title Custom BGS/Firehose Sync Flow
-
- AS->>DBR: 1. Get List of Active DIDs and Sync Cursors
-
- loop Sync all DIDs in Registry
- AS->>PS1: 2a. Sync Request: com.atproto.sync.getLatestCommit(did:...)
- PS1-->>AS: 3a. Commit Response (Root CID)
-
- AS->>PS1: 4a. Fetch Blocks: com.atproto.sync.getRepo(did:..., since: Cursor)
- PS1-->>AS: 5a. Response: CAR file containing new records/diff
-
- AS->>AS: 6. Verify, Decode, Filter for "app.citizen.report"
-
- AS->>IMB: 7. Publish Event: Decoded Citizen Report
- AS->>DBR: 8. Update Cursor (New last synced sequence number)
- end
-
- AS->>AS: 9. App View Consumes IMB Topic (The Custom Firehose)
\ No newline at end of file
diff --git a/documents/BGS_Integration_Plan.md b/documents/BGS_Integration_Plan.md
deleted file mode 100644
index 52e5864e..00000000
--- a/documents/BGS_Integration_Plan.md
+++ /dev/null
@@ -1,352 +0,0 @@
-# BGS / Firehose Integration Plan
-
-## Status: Transitioning to Atmosphere Lexicon Events
-
-This document outlines the transition of the BGS integration from a monolithic REST API to a more granular, event-driven model based on the Atmosphere Lexicon. While the initial REST API (`/api/external-biosamples`, `/api/projects`) remains functional for backward compatibility, new integrations should prefer the generic `/api/firehose/event` endpoint.
-
----
-
-## Architecture Overview
-
-For the MVP and early phases, we utilize a **Secure REST API** pattern. The BGS server (or Edge App) acts as an authenticated API client.
-
-### Generic Atmosphere Event API (Recommended for New Integrations)
-
-This endpoint provides a unified entry point for all Atmosphere Lexicon records (Biosample, Sequence Run, Alignment, Project, etc.). The client sends a JSON payload representing a `FirehoseEvent` type, which is then dispatched to the appropriate handler.
-
-* **Integration Point:** `POST /api/firehose/event`
-* **Controller:** `app/controllers/CitizenBiosampleController.scala` (specifically `processEvent` action)
-* **Handler:** `app/services/firehose/AtmosphereEventHandler.scala`
-* **Data Models:** `app/models/atmosphere/*Record.scala` and `app/services/firehose/*Event.scala`
-* **Security:** API Key authentication via `X-API-Key` header (`ApiSecurityAction`)
-
-### Legacy (Phase 1) Monolithic APIs (For Backward Compatibility)
-
-These endpoints handle `ExternalBiosampleRequest` which is a monolithic structure that embeds all related data. This will eventually be deprecated in favor of the granular Atmosphere events.
-
-#### Citizen Biosample API
-
-* **Integration Point:** `POST /api/external-biosamples`
-* **Controller:** `app/controllers/CitizenBiosampleController.scala`
-* **Service:** `app/services/CitizenBiosampleService.scala`
-* **Data Model:** `app/models/api/ExternalBiosampleRequest.scala`
-* **Security:** API Key authentication via `X-API-Key` header (`ApiSecurityAction`)
-
-#### Full CRUD Operations (Legacy)
-
-| Operation | Endpoint | Description |
-|-----------|----------|-------------|
-| **Create** | `POST /api/external-biosamples` | Create new citizen biosample with donor resolution |
-| **Update** | `PUT /api/external-biosamples/{atUri}` | Update existing biosample (optimistic locking via `atCid`) |
-| **Delete** | `DELETE /api/external-biosamples/{atUri}` | Soft delete biosample |
-
-#### Project API (Legacy)
-
-| Operation | Endpoint | Description |
-|-----------|----------|-------------|
-| **Create** | `POST /api/projects` | Create new research project |
-| **Update** | `PUT /api/projects/{atUri}` | Update project (optimistic locking) |
-| **Delete** | `DELETE /api/projects/{atUri}` | Soft delete project |
-
----
-
-## Data Model: Atmosphere Lexicon Granular Records
-
-The Edge App now generates and sends granular records defined by the `com.decodingus.atmosphere` Lexicon. The previous monolithic `ExternalBiosampleRequest` is now broken down into distinct, inter-referenced records.
-
-### Key Concepts
-
-1. **PDS Owner (citizenDid):** The researcher/genealogist running the Edge App. Owns the AT Protocol records.
-2. **Granular Records:** Each significant entity (Biosample, Sequence Run, Alignment, Project) is its own top-level record with a unique `atUri`.
-3. **Referential Integrity:** Records link to each other using `atUri` references (e.g., `SequenceRunRecord.biosampleRef` points to a `BiosampleRecord`).
-
-### Linkage Keys
-
-* `atUri`: The canonical AT Protocol identifier (`at://did:plc:xxx/collection/rkey`) - uniquely identifies *any* record.
-* `atCid`: Content Identifier for optimistic locking / version tracking.
-* `citizenDid`: Identifies the PDS owner, extracted from `atUri` or provided explicitly.
-* `donorIdentifier`: Identifies the specific biological source (person) within that PDS owner's collection.
-
-### SpecimenDonor Resolution Logic
-
-Implemented in `CitizenBiosampleService.resolveOrCreateDonor()`:
-
-1. Extract `citizenDid` from `atUri` (format: `at://did:plc:xxx/...`)
-2. Look up `SpecimenDonor` by `(citizenDid, donorIdentifier)` pair
-3. If found: Link biosample to existing donor (aggregates multiple datasets)
-4. If not found: Create new `SpecimenDonor` with `donorType = Citizen`
-
-
-
----
-
-## Data Payload Specification (Atmosphere Lexicon Events)
-
-Clients should send JSON payloads corresponding to `FirehoseEvent` wrappers around the Atmosphere Lexicon records. The `action` field (`Create`, `Update`, `Delete`) dictates the operation.
-
-### 1. `BiosampleEvent` (for `com.decodingus.atmosphere.biosample`)
-
-**Example `Create` Payload:**
-
-```json
-{
- "atUri": "at://did:plc:alice123/com.decodingus.atmosphere.biosample/3jui7q2lx",
- "atCid": "bafyreihp47vj6t24z4k3f2f5vj4b5t3g2d5c3v2h5j4k3l2m5n6o4p3q2r",
- "action": "Create",
- "payload": {
- "atUri": "at://did:plc:alice123/com.decodingus.atmosphere.biosample/3jui7q2lx",
- "meta": {
- "version": 1,
- "createdAt": "2025-12-07T10:00:00Z"
- },
- "sampleAccession": "BGS-SAMPLE-001",
- "donorIdentifier": "Subject-Alice-1",
- "citizenDid": "did:plc:alice123",
- "description": "Blood sample from Alice's WGS",
- "centerName": "Home Lab BGS Node",
- "sex": "Female",
- "haplogroups": {
- "yDna": {
- "haplogroupName": "H1",
- "score": 0.99
- },
- "mtDna": {
- "haplogroupName": "K1a10",
- "score": 0.98
- }
- },
- "sequenceRunRefs": [],
- "genotypeRefs": [],
- "populationBreakdownRef": null,
- "strProfileRef": null
- }
-}
-```
-
-### 2. `SequenceRunEvent` (for `com.decodingus.atmosphere.sequencerun`)
-
-**Example `Create` Payload:**
-
-```json
-{
- "atUri": "at://did:plc:alice123/com.decodingus.atmosphere.sequencerun/abc123xyz",
- "atCid": "bafyreiaabcdefghijklmnopqrstuvwxyz0123456789",
- "action": "Create",
- "payload": {
- "atUri": "at://did:plc:alice123/com.decodingus.atmosphere.sequencerun/abc123xyz",
- "meta": {
- "version": 1,
- "createdAt": "2025-12-07T11:00:00Z"
- },
- "biosampleRef": "at://did:plc:alice123/com.decodingus.atmosphere.biosample/3jui7q2lx",
- "platformName": "ILLUMINA",
- "instrumentModel": "NovaSeq 6000",
- "instrumentId": "SN0001",
- "testType": "WGS",
- "libraryLayout": "PAIRED",
- "totalReads": 850000000,
- "readLength": 150,
- "meanInsertSize": 350.5,
- "runDate": "2025-10-15T09:00:00Z",
- "files": [
- {
- "fileName": "alice_wgs.fastq.gz",
- "fileSizeBytes": 50000000000,
- "fileFormat": "FASTQ",
- "checksum": "sha256-...",
- "checksumAlgorithm": "SHA-256",
- "location": "/data/alice/alice_wgs.fastq.gz"
- }
- ],
- "alignmentRefs": []
- }
-}
-```
-
-### 3. `AlignmentEvent` (for `com.decodingus.atmosphere.alignment`)
-
-**Example `Create` Payload:**
-
-```json
-{
- "atUri": "at://did:plc:alice123/com.decodingus.atmosphere.alignment/def456uvw",
- "atCid": "bafyreic1d2e3f4g5h6i7j8k9l0m1n2o3p4q5r6s7t8u9v0w",
- "action": "Create",
- "payload": {
- "atUri": "at://did:plc:alice123/com.decodingus.atmosphere.alignment/def456uvw",
- "meta": {
- "version": 1,
- "createdAt": "2025-12-07T12:00:00Z"
- },
- "sequenceRunRef": "at://did:plc:alice123/com.decodingus.atmosphere.sequencerun/abc123xyz",
- "biosampleRef": "at://did:plc:alice123/com.decodingus.atmosphere.biosample/3jui7q2lx",
- "referenceBuild": "GRCh38",
- "aligner": "BWA-MEM 0.7.17",
- "variantCaller": "GATK HaplotypeCaller 4.2",
- "files": [
- {
- "fileName": "alice_wgs.cram",
- "fileSizeBytes": 20000000000,
- "fileFormat": "CRAM",
- "checksum": "sha256-...",
- "checksumAlgorithm": "SHA-256",
- "location": "/data/alice/alice_wgs.cram"
- }
- ],
- "metrics": {
- "genomeTerritory": 3000000000,
- "meanCoverage": 35.5,
- "medianCoverage": 30.0,
- "sdCoverage": 10.2,
- "pctExcDupe": 0.05,
- "pctExcMapq": 0.01,
- "pct10x": 0.95,
- "pct20x": 0.90,
- "pct30x": 0.85,
- "hetSnpSensitivity": 0.99
- }
- }
-}
-```
-
-### 4. `AtmosphereProjectEvent` (for `com.decodingus.atmosphere.project`)
-
-**Example `Create` Payload:**
-
-```json
-{
- "atUri": "at://did:plc:alice123/com.decodingus.atmosphere.project/my-family-project",
- "atCid": "bafyreidf8w9x7y6z5a4b3c2d1e0f9g8h7i6j5k4l3m2n1o0p9q8r7s6t5u4v3w2",
- "action": "Create",
- "payload": {
- "atUri": "at://did:plc:alice123/com.decodingus.atmosphere.project/my-family-project",
- "meta": {
- "version": 1,
- "createdAt": "2025-12-07T13:00:00Z"
- },
- "projectName": "Alice's Family Tree Project",
- "description": "Research project on the maternal lineage of the Alice family.",
- "administrator": "did:plc:alice123",
- "memberRefs": [
- "at://did:plc:alice123/com.decodingus.atmosphere.biosample/3jui7q2lx",
- "at://did:plc:alice123/com.decodingus.atmosphere.biosample/other-sample"
- ]
- }
-}
-```
-
-### Key Fields (Atmosphere Events)
-
-| Field | Required | Description |
-|-------|----------|-------------|
-| `atUri` | Yes | AT Protocol URI - canonical identifier for the specific record |
-| `atCid` | Yes | Content Identifier for optimistic locking / version tracking |
-| `action` | Yes | Operation type (`Create`, `Update`, `Delete`) |
-| `payload` | Yes (for Create/Update) | The specific Lexicon record (e.g., `BiosampleRecord`, `SequenceRunRecord`) |
-| `payload.meta.createdAt` | Yes | Timestamp of record creation |
-| `payload.biosampleRef` | Yes (for child records) | AT URI of the parent biosample |
-| `payload.sequenceRunRef` | Yes (for Alignment) | AT URI of the parent sequence run |
-
----
-
-## PDS Registration
-
-Before syncing data, PDS instances must be registered:
-
-**Endpoint:** `POST /api/registerPDS`
-
-```json
-{
- "did": "did:plc:abc123",
- "handle": "researcher.bsky.social",
- "pdsUrl": "https://pds.example.com",
- "rToken": "auth-token-from-edge-app"
-}
-```
-
-The registration process:
-1. Verifies PDS is reachable via `com.atproto.sync.getLatestCommit`
-2. Stores DID, PDS URL, and initial sync cursor
-3. Enables the Rust sync cluster to poll for updates
-
-### PDS Lease Management
-
-For parallel sync processing, the `pds_registrations` table includes:
-- `leased_by_instance_id`: Which sync worker owns this PDS
-- `lease_expires_at`: Lease expiration for failover
-- `processing_status`: idle | processing | error
-
----
-
-## Database Schema
-
-### Tables
-
-| Table | Purpose |
-|-------|---------|
-| `citizen_biosample` | Citizen/Atmosphere biosample records |
-| `specimen_donor` | Physical persons (donors) - linked via `specimen_donor_id` FK |
-| `project` | Research projects grouping biosamples |
-| `sequence_library` | Sequence run records |
-| `sequence_file` | Sequence file metadata |
-| `alignment_metadata` | Alignment metadata and metrics |
-| `pds_registrations` | Registered PDS instances for sync |
-| `publication_citizen_biosample` | Links biosamples to publications |
-| `citizen_biosample_original_haplogroup` | Publication-reported haplogroups |
-
-### Key Columns on `citizen_biosample` (and other Atmosphere-enabled tables)
-
-| Column | Type | Purpose |
-|--------|------|---------|
-| `at_uri` | VARCHAR | AT Protocol canonical identifier |
-| `at_cid` | VARCHAR | Version for optimistic locking |
-| `specimen_donor_id` | INT FK | Link to physical donor |
-| `deleted` | BOOLEAN | Soft delete flag |
-| `y_haplogroup` | JSONB | Full HaplogroupResult with scoring |
-| `mt_haplogroup` | JSONB | Full HaplogroupResult with scoring |
-
----
-
-## Integration Roadmap
-
-### Phase 1 (Legacy): Direct REST API
-
-* **Mechanism:** Synchronous HTTP POST
-* **Flow:** `Edge App` → `CitizenBiosampleController` (`/api/external-biosamples`, `/api/projects`) → `CitizenBiosampleService` → `CitizenBiosampleEventHandler` → `DB`
-* **Status:** Functional for existing integrations. Use the new `/api/firehose/event` for all new Atmosphere Lexicon-based events.
-
-### Phase 2: Asynchronous Event Ingestion (Kafka)
-
-* **Mechanism:** Message Queue
-* **Flow:** `Edge App` → `Kafka Topic` → `DecodingUs Kafka Consumer` → `AtmosphereEventHandler` → `DB`
-* **Change:** Edge App uses Kafka Producer; DecodingUs adds Kafka Consumer service. Processes raw Atmosphere Lexicon events.
-* **Benefits:** Decoupled; handles traffic bursts; high resilience.
-
-### Phase 3: Decentralized AppView (AT Protocol Firehose)
-
-* **Mechanism:** AT Protocol Firehose subscription
-* **Flow:** `Edge App` → `User's PDS` → `AT Proto Relay` → `DecodingUs Firehose Consumer` → `AtmosphereEventHandler` → `DB`
-* **Change:** Edge App writes directly to PDS using `com.decodingus.atmosphere.*` Lexicon records; DecodingUs becomes a passive indexer.
-* **Benefits:** True user data ownership; interoperability with AT Protocol ecosystem.
-
----
-
-## Deployment Checklist
-
-### For New Atmosphere Integrations (using `/api/firehose/event`)
-
-1. **API Key:** Configure in AWS Secrets Manager (prod) or `application.conf` (dev)
-2. **Database:** Ensure all relevant evolutions (`25.sql` and prior) have been applied to update table schemas (e.g., `at_uri`, `at_cid` on `sequence_library`, new fields on `alignment_metadata`).
-3. **Edge App Config:** Set DecodingUs API URL and API key. Configure the Edge App to construct and send `FirehoseEvent` JSON payloads as per the Atmosphere Lexicon.
-4. **Test:** POST example `FirehoseEvent` payloads (e.g., `BiosampleEvent`, `SequenceRunEvent`, `AlignmentEvent`, `AtmosphereProjectEvent`) to `/api/firehose/event`.
-5. **Verify:** Check `citizen_biosample`, `specimen_donor`, `sequence_library`, `sequence_file`, `alignment_metadata`, and `project` tables for correctly ingested and linked data.
-
-### Swagger UI
-
-API documentation available at: `/api/docs`
-
-Documented endpoints now include:
-- **Generic Atmosphere Event Processor:** `POST /api/firehose/event`
-- Legacy Citizen Biosamples (Create, Update, Delete)
-- Legacy Projects (Create, Update, Delete)
-- References, Haplogroups, Coverage, Sequencer APIs
diff --git a/documents/Coverage_Priority_Report.md b/documents/Coverage_Priority_Report.md
deleted file mode 100644
index d9d1a1ef..00000000
--- a/documents/Coverage_Priority_Report.md
+++ /dev/null
@@ -1,58 +0,0 @@
-# Code Coverage Analysis & Prioritization
-
-## Current Status
-* **Overall Statement Coverage:** ~5.72%
-* **Overall Branch Coverage:** ~3.44%
-* **Tested Areas:**
- * `HomeController` (Partial, rendering only)
- * `PgpBiosampleService` (Creation logic)
- * `AccessionNumberGenerator` (ID generation logic)
- * `AccessionNumberGenerator` (Infrastructure logic via decoupling)
-
-## Analysis of Gaps
-
-The application has significant gaps in its testing strategy. Most controllers and services are completely untested.
-
-### High Criticality (Core Business Logic)
-These areas handle data integrity, ingestion, and the core value proposition (Trees, Biosamples).
-
-1. **`BiosampleDataService.scala`**
- * **Role:** Orchestrates linking publications and adding raw sequence data to biosamples.
- * **Risk:** High. Failures here mean data loss or corruption during ingestion. Complex nested `Future` chains.
- * **Status:** 0% Coverage.
-
-2. **`HaplogroupTreeService.scala` & `TreeImporter.scala`**
- * **Role:** Manages the phylogenetic trees (Y-DNA/mtDNA).
- * **Risk:** High. The tree is the central data structure of the application.
- * **Status:** 0% Coverage.
-
-3. **`BiosampleUpdateService.scala`**
- * **Role:** Handling modifications to existing records.
- * **Risk:** Medium-High. Potential for unauthorized or incorrect data overwrites.
- * **Status:** 0% Coverage.
-
-### Medium Criticality (Controllers & Display)
-1. **`ExternalBiosampleController.scala`**
- * **Role:** Entry point for creating non-PGP biosamples.
- * **Risk:** Medium. Similar to `PgpBiosampleController` but less restrictive.
- * **Status:** 0% Coverage.
-
-2. **`BiosampleController.scala`**
- * **Role:** Retrieval and viewing of samples.
- * **Risk:** Low-Medium (Read-only mostly).
- * **Status:** 0% Coverage.
-
-## Prioritized Action Plan
-
-We recommend addressing coverage in the following order to maximize stability and reliability:
-
-| Priority | Component | Rationale |
-| :--- | :--- | :--- |
-| **1** | **`BiosampleDataService`** | Complex data orchestration (Library -> File -> Checksum -> Location) is prone to bugs. |
-| **2** | **`ExternalBiosampleService`** | Completes the coverage for "Ingestion" workflows (pairing with PGP service). |
-| **3** | **`BiosampleUpdateService`** | Ensures data modification safety. |
-| **4** | **`HaplogroupTreeService`** | Core domain logic, though often static/read-heavy. |
-| **5** | **`ExternalBiosampleController`** | API surface testing. |
-
-## Tech Debt Note
-* `BiosampleDataService` relies heavily on multiple repositories. Following the pattern used in `BiosampleAccessionGenerator`, we should strictly mock these repositories rather than trying to use an in-memory DB, to keep tests fast and focused on the orchestration logic.
diff --git a/documents/Database_Schema_Review.md b/documents/Database_Schema_Review.md
deleted file mode 100644
index 15907bf1..00000000
--- a/documents/Database_Schema_Review.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# Database Schema Review: Alignment with Application Goals
-
-This document reviews the current database schema (`app/models/dal/`) against the application's stated goals of becoming a "App Layer in the Atmosphere" for citizen science genetic research.
-
-## Summary of Findings
-
-The database schema is remarkably mature and well-aligned with the project's complex domain requirements (Pangenome, Haplogroups, IBD). It already includes sophisticated structures for:
-* **Decentralized Identity:** Native support for DIDs in user and donor tables.
-* **Reputation:** A built-in system for tracking user contributions.
-* **Privacy-Preserving Discovery:** Specialized tables for IBD matches that verify PDS attestations without exposing raw data.
-
-However, specific gaps exist regarding the **operational management** of the Edge Node fleet and the **auditability** of specific data submissions.
-
-## Detailed Alignment Analysis
-
-### 1. Goal: Collaborative Haplogroup Tree Resolution
-* **Status:** **Strong Support**
-* **Evidence:** `HaplogroupsTable`, `HaplogroupRelationshipsTable`, and `HaplogroupVariantMetadataTable` provide a complete graph structure for storing the tree.
-* **Gap:** **Submission Provenance.** While the *result* (the tree) is stored, there is no obvious table (e.g., `HaplogroupCallSubmissions`) to track *which* Edge Node proposed a specific variant or branch change before it was accepted. Tracking this is crucial for the "Collaborative" aspect and resolving conflicts.
-
-### 2. Goal: Privacy-Preserving Genetic Relative Discovery (IBD)
-* **Status:** **Excellent Support**
-* **Evidence:**
- * `IbdDiscoveryIndicesTable`: Stores the *existence* of a match and its strength (cM) without storing the raw segment data, perfectly aligning with the privacy goal.
- * `IbdPdsAttestationsTable`: Links matches to `attesting_pds_guid` and includes an `attestation_signature`. This is a critical feature for a distributed trust model, allowing PDS owners to cryptographically sign off on matches.
-
-### 3. Goal: Edge Computing Participation (Citizen Science)
-* **Status:** **Mixed**
-* **Evidence (Positive):**
- * `UserPdsInfoTable`: Explicitly links Users to a `pds_url` and `did`.
- * `ReputationEventsTable` & `UserReputationScoresTable`: A complete gamification/credit system is already defined in the schema, allowing the system to reward users for contributions.
-* **Evidence (Negative):**
- * **Missing Operational State:** There is no table to track the *live status* of Edge Nodes. If a user has 5 computers running the Edge software, the database has no way to know which are Online, Offline, or their current load.
- * **Missing Device Registry:** `UserPdsInfo` links a *User* to a PDS. It does not clearly support a User having *multiple* distinct compute nodes (devices) with different capabilities.
-
-### 4. Goal: Secure Data Interaction (AT Protocol)
-* **Status:** **Good Support**
-* **Evidence:**
- * `SequenceAtpLocationTable`: Directly maps sequence files to AT Protocol concepts (`repo_did`, `record_cid`), enabling the "App Layer" to reference data stored in the distributed network.
- * `SpecimenDonorsTable`: Contains `citizen_biosample_did`, facilitating the link between physical samples and their digital twins on the AT Protocol.
-
-## Recommendations
-
-To fully support the "App Layer" vision, we recommend the following schema additions:
-
-1. **Edge Node Registry Table:**
- * Create `EdgeNodesTable` (or `UserComputeNodes`) to track individual devices associated with a user/PDS.
- * Columns: `node_id (UUID)`, `user_id`, `last_heartbeat (Timestamp)`, `status (Online/Offline/Busy)`, `software_version`.
-
-2. **Submission Audit Tables:**
- * Create `HaplogroupSubmissionsTable` to log incoming calls from Edge Nodes before they are merged into the main `BiosampleHaplogroupsTable`.
- * Columns: `submission_id`, `biosample_id`, `edge_node_id`, `proposed_haplogroup`, `confidence_score`, `algorithm_version`, `submission_timestamp`.
-
-3. **Job Assignment Table (Optional):**
- * If the server intends to *dispatch* work to Edge Nodes (rather than just accepting results), a `ComputeJobsTable` will be needed to track which node was assigned which task.
-
-## Conclusion
-
-The schema requires only minor additions to support the operational aspects of the "Edge Node" fleet. The core scientific and identity data models are robust and ready for production.
diff --git a/documents/Internationalization_Guide.md b/documents/Internationalization_Guide.md
deleted file mode 100644
index 44a7dd49..00000000
--- a/documents/Internationalization_Guide.md
+++ /dev/null
@@ -1,116 +0,0 @@
-# Internationalization (I18n) Guide for Decoding Us
-
-## Overview
-Currently, the application has English text embedded directly into Twirl templates. To support multiple languages, we should adopt the **Standard Play Framework I18n Pattern**. This approach is robust, performant, and natively supported by the framework without requiring external libraries.
-
-## The Design Pattern
-
-The core concept is to separate **Content** (text) from **Structure** (HTML/Twirl).
-
-### 1. Architecture
-* **Message Files:** Text is stored in `conf/messages` (default/English), `conf/messages.fr` (French), `conf/messages.es` (Spanish), etc.
-* **Key-Value Pairs:** Each line in these files maps a unique key to a translated string.
- * `home.title = Welcome to Decoding Us`
-* **Twirl Templates:** Instead of hardcoded text, templates use the `Messages` object to look up strings by key.
- * `
@messages("home.title") `
-* **Context Propagation:** Controllers inject `MessagesControllerComponents` and mix in `I18nSupport` to automatically detect the user's preferred language (via `Accept-Language` header or cookies) and pass the correct `Messages` provider to the view.
-
-## Implementation Steps
-
-### Step 1: Create Message Files
-Create the `conf/messages` file for the default language (English).
-
-**File:** `conf/messages`
-```properties
-# General
-app.name = Decoding Us
-site.title = Decoding Us - Citizen Science Genetics
-
-# Navigation
-nav.home = Home
-nav.about = About
-nav.contact = Contact
-
-# Home Page
-home.welcome = Welcome to Decoding Us
-home.intro = Decoding Us will be a next-generation platform for citizen science...
-home.goals.title = The system shall be architected with these goals:
-```
-
-### Step 2: Update Configuration
-Enable the languages in `conf/application.conf`.
-
-```hocon
-play.i18n {
- # The list of supported languages
- langs = [ "en", "fr", "es" ]
-}
-```
-
-### Step 3: Refactor Controllers
-Update controllers to provide `Messages` support. This is often done by injecting `MessagesControllerComponents`.
-
-**Example:**
-```scala
-import play.api.mvc._
-import play.api.i18n._
-import javax.inject.Inject
-
-class HomeController @Inject()(cc: MessagesControllerComponents) extends AbstractController(cc) with I18nSupport {
- def index = Action { implicit request =>
- // 'request' implicitly contains the messages context due to I18nSupport
- Ok(views.html.index())
- }
-}
-```
-
-### Step 4: Refactor Views
-Update Twirl templates to accept an implicit `Messages` provider and use it.
-
-**File:** `app/views/index.scala.html`
-```scala
-@()(implicit messages: Messages)
-
-@main(messages("site.title")) {
-
- @messages("home.welcome")
- @messages("home.intro")
-
-}
-```
-
-**File:** `app/views/main.scala.html` (Layout)
-```scala
-@(title: String)(content: Html)(implicit messages: Messages)
-
-
-
-
-
@title
-
-
-
- @_navbar()
- @content
-
-
-```
-
-## Handling Dynamic Content
-For text that includes dynamic values (e.g., "Hello, John"), use placeholders in the message file.
-
-`conf/messages`:
-```properties
-greeting = Hello, {0}!
-```
-
-Twirl:
-```scala
-@messages("greeting", userName)
-```
-
-## Advantages
-1. **Standardization:** Any Play developer will instantly understand this structure.
-2. **Performance:** Message lookups are extremely fast and compiled.
-3. **Type Safety:** While the keys are strings, the integration with Twirl is robust.
-4. **Flexibility:** Adding a new language just requires adding a new `messages.xx` file.
diff --git a/documents/Internationalization_Guide_Blocks.md b/documents/Internationalization_Guide_Blocks.md
deleted file mode 100644
index 6d4a4fc8..00000000
--- a/documents/Internationalization_Guide_Blocks.md
+++ /dev/null
@@ -1,88 +0,0 @@
-# Internationalization (I18n) Guide: Block-Based Content Strategy
-
-## Overview
-While the standard key-value pair approach (Property Files) is excellent for UI labels and short text, it is cumbersome and unmaintainable for long-form content like "About Us" pages, blog posts, or extensive privacy policies.
-
-For these cases, we recommend a **Block-Based Content Strategy** that treats long-form content as structural dependencies rather than simple strings.
-
-## Recommended Pattern: Localized Partial Views
-
-Instead of putting entire paragraphs into a `messages` file, we create separate Twirl templates (partials) for the content blocks of each language.
-
-### 1. Architecture
-
-* **Structure:** Maintain the main page structure (layout, headers, footers) in a master template.
-* **Content Blocks:** Create a directory structure for localized content fragments.
- * `app/views/content/en/aboutBody.scala.html`
- * `app/views/content/es/aboutBody.scala.html`
-* **Dispatcher:** Use a helper (or the controller) to dynamically select the correct partial based on the user's language.
-
-### 2. Implementation
-
-#### File Structure
-```
-app/
- views/
- about.scala.html (Master structure)
- content/
- en/
- aboutText.scala.html (English paragraphs)
- es/
- aboutText.scala.html (Spanish paragraphs)
-```
-
-#### The Content Partial (English)
-**`app/views/content/en/aboutText.scala.html`**
-```html
-
Decoding Us will be a next-generation platform for citizen science focused on empowering individuals...
-
The system shall be architected with these goals:
-
-```
-
-#### The Master View (Dispatcher)
-**`app/views/about.scala.html`**
-```scala
-@()(implicit messages: Messages)
-
-@main(messages("nav.about")) {
-
-
@messages("nav.about")
-
- @messages.lang.code match {
- case "es" => { @views.html.content.es.aboutText() }
- case "fr" => { @views.html.content.fr.aboutText() }
- case _ => { @views.html.content.en.aboutText() }
- }
-
-}
-```
-
-### 3. Alternative: Markdown-Based Content
-
-For even easier editing (especially for non-developers), you can store long-form content as **Markdown** files and render them at runtime.
-
-* **Storage:** `conf/content/about/en.md`, `conf/content/about/es.md`.
-* **Loader:** A simple service reads the file based on the requested language.
-* **Renderer:** Use a library like `flexmark-java` to convert Markdown to HTML in the controller, then pass the `Html` object to the view.
-
-**Controller Example:**
-```scala
-def about = Action { implicit request =>
- val lang = messagesApi.preferred(request).lang.code
- val markdownContent = contentLoader.load("about", lang) // returns "## About Us..."
- val htmlContent = MarkdownRenderer.render(markdownContent)
- Ok(views.html.about(htmlContent))
-}
-```
-
-## Summary Recommendation
-
-| Use Case | Recommended Pattern |
-| :--- | :--- |
-| **UI Labels, Buttons, Short Titles** | **Standard `messages` file** (Key-Value). |
-| **Static Long-Form (About, Terms)** | **Localized Partial Views** (Twirl). Best for compile-time safety. |
-| **Dynamic/Frequent Long-Form (Blog)** | **Markdown Files**. Best for ease of editing and CMS-like behavior. |
-
-For the MVP "About" page, **Localized Partial Views** offers the best balance of type safety and maintainability without introducing new dependencies.
diff --git a/documents/atmosphere/02-Core-Records.md b/documents/atmosphere/02-Core-Records.md
index 4903b7d8..98a9f408 100644
--- a/documents/atmosphere/02-Core-Records.md
+++ b/documents/atmosphere/02-Core-Records.md
@@ -365,6 +365,87 @@ This record defines a research project that aggregates multiple biosamples withi
---
+## 6. Instrument Observation Record
+
+A citizen's explicit claim that a sequencer instrument id (from `@RG` headers)
+belongs to a particular laboratory, carrying a confidence level. The AppView
+mirrors these into `fed.instrument_observation` and the **consensus engine**
+(`du_db::sequencer::recompute_consensus`) folds them — weighted by `confidence` and
+recency — alongside the implicit `centerName` claims on biosamples, producing
+curator proposals that, when accepted, set the instrument→lab tie the public
+`/api/v1/sequencer/lab` lookup resolves. This is the only citizen-driven input to
+lab inference; everything else is read-only lookup. See
+`documents/planning/sequencer-lab-inference-system.md`.
+
+**NSID:** `com.decodingus.atmosphere.instrumentObservation`
+
+```json
+{
+ "lexicon": 1,
+ "id": "com.decodingus.atmosphere.instrumentObservation",
+ "defs": {
+ "main": {
+ "type": "record",
+ "description": "An observation of a sequencer instrument and its associated laboratory, extracted from BAM/CRAM read headers.",
+ "key": "tid",
+ "record": {
+ "type": "object",
+ "required": ["instrumentId", "labName", "biosampleRef", "observedAt"],
+ "properties": {
+ "instrumentId": {
+ "type": "string",
+ "description": "The instrument ID extracted from the @RG header (e.g., 'A00123').",
+ "minLength": 1,
+ "maxLength": 255
+ },
+ "labName": {
+ "type": "string",
+ "description": "The name of the sequencing laboratory (as known by the user or inferred).",
+ "minLength": 1,
+ "maxLength": 255
+ },
+ "biosampleRef": {
+ "type": "string",
+ "description": "AT URI of the biosample this observation was extracted from."
+ },
+ "platform": {
+ "type": "string",
+ "description": "Sequencing platform.",
+ "knownValues": ["ILLUMINA", "PACBIO", "ONT", "MGI", "ELEMENT", "ULTIMA"]
+ },
+ "instrumentModel": {
+ "type": "string",
+ "description": "Inferred or known instrument model (e.g., 'NovaSeq 6000')."
+ },
+ "flowcellId": {
+ "type": "string",
+ "description": "Flowcell identifier if extractable from read headers."
+ },
+ "runDate": {
+ "type": "string",
+ "format": "datetime",
+ "description": "Date of the sequencing run if extractable."
+ },
+ "confidence": {
+ "type": "string",
+ "description": "Confidence level of the lab association (weights: KNOWN 1.0, INFERRED 0.7, GUESSED 0.3).",
+ "knownValues": ["KNOWN", "INFERRED", "GUESSED"],
+ "default": "INFERRED"
+ },
+ "observedAt": {
+ "type": "string",
+ "format": "datetime",
+ "description": "When this observation was recorded (drives the recency term of the consensus score)."
+ }
+ }
+ }
+ }
+ }
+}
+```
+
+---
+
## Mapping to `decodingus` Backend
To fully leverage these records, `decodingus` will evolve its internal data model:
diff --git a/documents/atmosphere/07-Discovery-Records.md b/documents/atmosphere/07-Discovery-Records.md
index 23a51c2e..8433f657 100644
--- a/documents/atmosphere/07-Discovery-Records.md
+++ b/documents/atmosphere/07-Discovery-Records.md
@@ -104,8 +104,81 @@ This record allows citizens to contribute instrument-lab observations from their
---
+## Private Variant Record
+
+This record lets a citizen publish the **private variants** their analysis found beyond
+their assigned terminal haplogroup — the mutations that may define a new branch. The
+DecodingUs AppView mirrors them into `fed.private_variant` and the **discovery consensus
+engine** (`du_db::discovery`) pools them across submitters by variant-set similarity
+(Jaccard) into proposed branches for curator review. One record per (biosample, DNA arm).
+
+**Privacy:** like the `biosample`/`strProfile` summary records, this is citizen-opt-in,
+keyed by biosample ref (no donor PII); variants are anonymized to coordinates/known names.
+
+**NSID:** `com.decodingus.atmosphere.privateVariant`
+
+```json
+{
+ "lexicon": 1,
+ "id": "com.decodingus.atmosphere.privateVariant",
+ "defs": {
+ "main": {
+ "type": "record",
+ "description": "The private variants a sample carries beyond its assigned terminal haplogroup — candidate defining mutations for a new branch.",
+ "key": "tid",
+ "record": {
+ "type": "object",
+ "required": ["meta", "biosampleRef", "dnaType", "terminalHaplogroup", "variants"],
+ "properties": {
+ "meta": { "type": "ref", "ref": "com.decodingus.atmosphere.defs#recordMeta" },
+ "biosampleRef": {
+ "type": "string",
+ "description": "AT URI of the biosample these private variants were extracted from."
+ },
+ "sequenceRunRef": {
+ "type": "string",
+ "description": "AT URI of the specific sequence run (optional, for precision)."
+ },
+ "dnaType": {
+ "type": "string",
+ "description": "Which tree the variants extend.",
+ "knownValues": ["Y_DNA", "MT_DNA"]
+ },
+ "terminalHaplogroup": {
+ "type": "string",
+ "description": "The terminal haplogroup the sample was assigned (e.g., 'R-M269'); the private variants sit below it."
+ },
+ "variants": {
+ "type": "array",
+ "description": "The private (mismatching) variant calls beyond the terminal.",
+ "items": {
+ "type": "object",
+ "required": ["contig", "position", "ancestral", "derived"],
+ "properties": {
+ "name": { "type": "string", "description": "Known name if any (e.g., 'FT123456'); omit for novel variants." },
+ "contig": { "type": "string", "description": "Reference contig (e.g., 'chrY')." },
+ "position": { "type": "integer", "description": "GRCh38 position." },
+ "ancestral": { "type": "string", "description": "Ancestral allele." },
+ "derived": { "type": "string", "description": "Derived allele." },
+ "rsId": { "type": "string", "description": "dbSNP rsID if known." }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+```
+
+---
+
## Backend Mapping
* **`InstrumentObservation`:** Maps to `instrument_observation` table for lab inference consensus.
+* **`PrivateVariant`:** Mirrored to `fed.private_variant`; the discovery consensus engine
+ (`du_db::discovery`) materializes it into `tree.biosample_private_variant` and pools it
+ into `tree.proposed_branch`. See
+ [haplogroup-discovery-system.md](../haplogroup-discovery-system.md) (D6).
See [sequencer-lab-inference-system.md](../sequencer-lab-inference-system.md) for implementation planning.
diff --git a/documents/curator-guide-tree-versioning.md b/documents/curator-guide-tree-versioning.md
index f331a5e7..dab2f95f 100644
--- a/documents/curator-guide-tree-versioning.md
+++ b/documents/curator-guide-tree-versioning.md
@@ -1,503 +1,260 @@
# Curator Guide: Tree Versioning System
-This guide explains how to use the Tree Versioning System to review, validate, and apply bulk changes to the haplogroup tree.
+This guide explains how to review, validate, and apply bulk changes to the
+haplogroup tree, and how to resolve the placements the merge/graft couldn't make
+confidently.
+
+The Rust AppView splits this across **two curator screens**:
+
+- **Change Sets** (`/curator/change-sets`) — the lifecycle: review the diff,
+ approve/reject changes, then apply or discard.
+- **Merge Reviews** (`/curator/reviews`) — resolve the ambiguous or blocked
+ placements the SNP-graft/merge staged for a human (the `wip_*` items).
+
+Both are reached from the **Curator dashboard** (`/curator`) and are gated by the
+**Curator** role (`Admin`, `TreeCurator`, or `Curator`) — there are no finer-grained
+per-action permissions.
---
## Overview
-When large external tree sources (like ISOGG or ytree.net) are merged into the system, the changes don't go directly to production. Instead, they're captured in a **Change Set** that you can review before applying.
+When large external tree sources (ISOGG, decoding-us, FTDNA) are merged or
+grafted in, the changes don't go straight to production. They're captured in a
+**Change Set** you review before applying.
This gives you:
- Time to review changes at your own pace
-- Ability to see what will change before it affects users
-- Tools to handle ambiguous placements
+- A diff of exactly what will change before it affects users
+- A separate worklist for ambiguous placements that need a human decision
- An audit trail of all changes
---
-## Accessing the Change Sets Dashboard
+## Change Sets screen (`/curator/change-sets`)
-1. Navigate to **Curator > Change Sets** from the main menu
-2. Or go directly to `/curator/change-sets`
-
-**Required Permission:** `tree.version.view`
-
----
+A two-panel (master-detail) HTMX screen.
-## Understanding the Dashboard
+### Left panel — the change-set list
-The dashboard shows a master-detail layout:
+Each change set shows its name, source, DNA type (Y/mt), status, change count,
+and who created it. Filter by status with the dropdown.
-### Left Panel: Change Set List
+### Right panel — the detail/diff
-Each change set shows:
-- **Name**: Descriptive name (e.g., "isogg-2025-12")
-- **Source**: Where the data came from (ISOGG, ytree.net, etc.)
-- **Type**: Y-DNA or mtDNA
-- **Status**: Current state (see below)
-- **Changes**: Total count and pending items
-- **Created**: When and by whom
+Selecting a change set loads its panel (`/curator/change-sets/:id/panel`) with:
+- Summary stats: **added / removed / modified / reparented**
+- The **diff** rows (type, node name, before→after detail) — rendered inline
+- The per-change list with each change's status
+- Comments, and the status-appropriate lifecycle actions
-### Right Panel: Change Set Details
+### Statuses
-Click a change set to see:
-- Full statistics (nodes created, updated, reparented)
-- Ambiguity warnings
-- Available actions based on status
-- Comments from other curators
-
----
-
-## Change Set Statuses
-
-| Status | Meaning | Your Action |
+| Status | Meaning | Your action |
|--------|---------|-------------|
-| **Draft** | Merge in progress | Wait for completion |
-| **Ready for Review** | Merge complete, needs review | Start review |
-| **Under Review** | Being actively reviewed | Continue reviewing |
-| **Applied** | Changes are in production | None (read-only) |
-| **Discarded** | Changes were abandoned | None (read-only) |
-
----
-
-## Filtering Change Sets
-
-Use the dropdown filters at the top:
-- **Type Filter**: Show only Y-DNA or mtDNA change sets
-- **Status Filter**: Show only sets in a specific status
-
----
-
-## Reviewing a Change Set
-
-### Step 1: Start the Review
-
-1. Click a change set with "Ready for Review" status
-2. Click **Start Review** in the detail panel
-3. Status changes to "Under Review"
-
-### Step 2: View the Full Diff
-
-Click **View Full Diff** to see all changes in the set:
-
-- **Green rows**: New nodes being created
-- **Yellow rows**: Existing nodes being updated
-- **Blue rows**: Nodes being reparented (moved in tree)
-
-The diff view shows:
-- What changed (name, variants, parent)
-- Before and after values for updates
-- Confidence scores for ambiguous placements
-
-### Step 2b: View Tree Preview (ASCII)
-
-For a structural overview of proposed changes, click the **Tree Preview** button in the change set detail panel (next to "View Diff"). The preview opens in a new browser tab.
-
-> **Direct URL:** You can also access it at `/curator/change-sets/{id}/tree-preview` if needed.
-
-This returns a plain-text ASCII tree showing affected subtrees with markers:
-
-```
-=== Tree Preview: Y merge from ISOGG ===
-Type: Y | Status: DRAFT
-New nodes: 8322 | Reparents: 306 | Variant additions: 76673
-
-Legend: [+] = new node, [→] = reparented, [~] = modified
-==================================================
-
-Y
-├── [+] A00-T (V60, V168, +3 more)
-│ ├── [→] A00
-│ └── [+] A0-T
-│ ├── [→] A0
-│ └── [→] A1
-└── BT
-
---- Nodes reparented to new WIP nodes ---
- A00: Y → A00-T [+]
- A0: Y → A0-T [+]
-
---- Variant additions to existing nodes ---
- E1a-Y947: (M4671, CTS9320, +38 more)
-```
-
-**Legend:**
-| Marker | Meaning |
-|--------|---------|
-| `[+]` | New node to be created |
-| `[→]` | Existing node being reparented |
-| `[~]` | Existing node with variant additions |
-
-The preview shows:
-- New nodes in their proposed tree position
-- Existing siblings for context
-- Up to 5 variant names per node
-- Summary of reparent operations
-- Summary of variant additions to existing nodes
-
-#### Tips for Navigating Large Previews
-
-For large merges with thousands of nodes, the ASCII preview can be dense. Here are some practical tips:
-
-1. **Use browser search (Ctrl+F / Cmd+F)** - Search for specific haplogroup names or variant names to jump directly to areas of interest
-
-2. **Focus on the summary sections** - Scroll to the bottom for:
- - "Nodes reparented to new WIP nodes" — shows all reparent operations in a compact list
- - "Variant additions to existing nodes" — lists all existing nodes receiving new variants
-
-3. **Look for markers first** - Search for `[+]` to find all new nodes, or `[→]` to find all reparented nodes
-
-4. **Copy to a text editor** - For very large previews, copy the text to an editor with better navigation (code folding, outline view, etc.)
-
-5. **Cross-reference with the Diff view** - Use the Tree Preview to understand structure, then switch to the Diff view for detailed change-by-change review
-
-> **Future Enhancement:** A graphical side-by-side tree comparison view is planned for a future release, which will provide a more visual way to review structural changes.
-
-### Step 3: Handle Ambiguities
-
-If the change set has ambiguities:
+| **DRAFT** | Merge/graft still materializing | Wait, then Start Review |
+| **READY_FOR_REVIEW** | Materialized, awaiting review | Start Review (or Apply) |
+| **UNDER_REVIEW** | Being actively reviewed | Approve/reject changes, then Apply |
+| **APPLIED** | Live in production | None (read-only) |
+| **DISCARDED** | Abandoned | None (read-only) |
-1. Look for the yellow warning banner showing the count
-2. Click **View Report** to see the ambiguity report
-3. For each ambiguity, decide:
- - **Accept the placement** (approve the change)
- - **Reject the placement** (skip this change)
- - **Manually fix** the data before applying
+Per-change status runs `PENDING → APPROVED`/`REJECTED → APPLIED`.
-Ambiguities occur when:
-- Multiple possible parent placements exist
-- The algorithm chose based on heuristics
-- A confidence score below threshold was assigned
+### Lifecycle actions
-#### Understanding Confidence Scores
+| Action | Endpoint | Available when |
+|--------|----------|----------------|
+| **Start Review** | `POST /curator/change-sets/:id/start-review` | DRAFT or READY_FOR_REVIEW → UNDER_REVIEW |
+| **Review one change** (approve/reject) | `POST /curator/change-sets/:id/changes/:change_id/review` | UNDER_REVIEW |
+| **Approve All Pending** | `POST /curator/change-sets/:id/approve-all` | UNDER_REVIEW |
+| **Apply** | `POST /curator/change-sets/:id/apply` | READY_FOR_REVIEW or UNDER_REVIEW |
+| **Discard** | `POST /curator/change-sets/:id/discard` | any non-APPLIED state |
+| **Comment** | `POST /curator/change-sets/:id/comments` | any |
-Each ambiguous placement includes a confidence score from 0.0 to 1.0. Use this guide to prioritize your review:
-
-| Score Range | Risk Level | Recommended Action |
-|-------------|------------|-------------------|
-| **0.80 – 1.00** | Low | Generally safe to approve. Algorithm had strong SNP overlap. Spot-check a few. |
-| **0.50 – 0.79** | Medium | Review the placement. Check if the shared SNPs make sense for this branch. |
-| **0.20 – 0.49** | High | Manual verification required. Compare source data against known phylogeny. |
-| **0.00 – 0.19** | Very High | Likely incorrect placement. Consider skipping or manually researching. |
-
-**What affects confidence:**
-- **SNP overlap** — More shared defining variants = higher confidence
-- **Conflicting variants** — Variants that contradict the placement lower confidence
-- **Tree depth** — Deeper placements with fewer distinguishing SNPs may have lower scores
-- **Source quality** — Some sources have more complete variant data than others
-
-### Step 4: Review Individual Changes
-
-In the "Under Review" status, you'll see pending changes:
-
-For each change you can:
-- **Approve**: Mark as validated
-- **Skip**: Exclude from this promotion (stays in set but won't apply)
-
-Use **Approve All Pending** to quickly approve remaining changes after you've reviewed the ambiguities.
+**Apply** promotes only the **APPROVED** changes to the live (temporal) tree and
+marks the set APPLIED; it's idempotent (re-applying an APPLIED set is a no-op).
+**Discard** requires a reason.
---
-## Reviewing Large Change Sets
+## Reviewing a change set
-When dealing with thousands of changes (common for major source updates like ISOGG), a systematic approach is essential.
+1. **Start the review** — select a READY_FOR_REVIEW set and click Start Review
+ (status → UNDER_REVIEW).
+2. **Read the diff** in the detail panel:
+ - added (new nodes), removed, modified (e.g. variant edits), reparented (moved).
+3. **Resolve any flagged placements** in the **Merge Reviews** screen (below) — the
+ merge/graft routes anything it couldn't place confidently there, rather than
+ guessing.
+4. **Approve individual changes**, or **Approve All Pending** once you've vetted the
+ flagged items.
+5. **Apply** when satisfied.
-### Current Workflow
+### Confidence scores
-1. **Triage by confidence** — Start with the Ambiguity Report, sorted by lowest confidence first
-2. **Use Tree Preview** — Get a structural overview before diving into details
-3. **Spot-check by branch** — Use browser search (Ctrl+F) in the Tree Preview or Diff view to find specific clades
-4. **Approve in bulk** — After reviewing ambiguities, use "Approve All Pending" for remaining items
+Flagged placements carry an anchor strength (0–100%). Prioritize accordingly:
-### Current Limitations
+| Strength | Risk | Recommended action |
+|----------|------|--------------------|
+| **80–100%** | Low | Generally safe; spot-check a few |
+| **50–79%** | Medium | Review the shared SNPs make sense for the branch |
+| **20–49%** | High | Manual verification against known phylogeny |
+| **0–19%** | Very high | Likely wrong; defer or research before accepting |
-The following features are not yet available but are on the roadmap:
-
-| Desired Feature | Current Workaround |
-|-----------------|-------------------|
-| Filter diff by branch (e.g., "only R1b") | Use Ctrl+F in Tree Preview or Diff view |
-| Bulk approve by subclade | Review ambiguities, then "Approve All Pending" |
-| Assign branches to expert curators | Coordinate manually; add comments noting who reviewed what |
-| Export diff to spreadsheet | Copy Tree Preview text to external tools |
-
-### Recommended Review Strategy for 5,000+ Changes
-
-1. **Don't review every change** — Focus on ambiguities and structural changes (reparents)
-2. **Trust high-confidence placements** — Scores above 0.80 rarely need individual review
-3. **Divide by expertise** — If multiple curators are available, coordinate by major branch:
- - "I'll review everything under R1b"
- - "You take the E-M96 subtree"
-4. **Use comments** — Add comments to the change set noting what you reviewed
-5. **Time-box your review** — Set a limit (e.g., 2 hours) then assess if more review is needed
-
-> **Feature Requests:** If you need filtering by branch or bulk approval by subclade, please submit a feature request. These are high-priority UX improvements under consideration.
+What affects it: SNP overlap (more shared defining variants = stronger), conflicting
+variants, tree depth, and source completeness.
---
-## Conflict Resolution
-
-Beyond simply approving or skipping changes, you can now create **resolutions** to correct merge algorithm decisions before applying to production.
+## Merge Reviews screen (`/curator/reviews`)
-### Resolution Types
+This is where you resolve the items the SNP-graft/merge staged for a human — the
+`tree.wip_*` rows: SNP-graft Phase-4 flags, name collisions, and graft-blocked
+branches.
-| Type | Description | Use Case |
-|------|-------------|----------|
-| **REPARENT** | Change the parent of a node | Algorithm placed node under wrong parent |
-| **EDIT_VARIANTS** | Add or remove variant associations | Missing or incorrect SNP assignments |
-| **MERGE_EXISTING** | Map WIP node to existing production node | Duplicate detection — don't create, link instead |
-| **DEFER** | Move to manual review queue | Needs expert review or more research |
+### Left panel — the worklist
-### Creating Resolutions
+The staged items, filterable by **status** and **category**. Each row shows the
+source, node name, category, best anchor, and any resolution already chosen.
-Resolutions are created via the API. Each resolution targets either a WIP haplogroup (new node) or a WIP reparent (move operation).
+### Right panel — one item's context + resolution form
-#### REPARENT Resolution
+Selecting an item loads its panel (`/curator/reviews/:wip_id/panel`) with the full
+decision context:
+- The reason it was flagged and its **category**
+- **Best anchor** + **anchor strength %**, and the candidate anchor nodes (with hit
+ counts)
+- Defining-SNP counts (and how many are known to the foundation)
+- The source parent and its status; whether it's backbone
+- The **tentative parent** and a preview of where it would land (that parent's
+ current children)
+- Open / resolved / deferred counts for the parent change set
-When the algorithm placed a node under the wrong parent:
+### Resolving an item
-```bash
-# Via curl (example)
-curl -X POST /curator/change-sets/123/resolve/reparent \
- -d "wipHaplogroupId=456" \
- -d "newParentId=789" \
- -d "notes=Source data shows this should be under R-M269"
-```
+`POST /curator/reviews/:wip_id/resolve` with an `action`, an optional `target`
+(a **node name**, resolved server-side — not a numeric id), and `notes`:
-Parameters:
-- `wipHaplogroupId` or `wipReparentId` — What to resolve (one required)
-- `newParentId` or `newParentPlaceholderId` — New parent (one required)
-- `notes` — Explanation for audit trail
+| Action | `target` | Use case |
+|--------|----------|----------|
+| **REPARENT** | new parent's name (**required**) | Confirm the suggested anchor, or choose a different parent |
+| **MERGE_EXISTING** | existing node's name (**required**) | The staged node duplicates a production node — link instead of creating |
+| **DEFER** | — | Needs more research; excluded from Apply until resolved |
-#### EDIT_VARIANTS Resolution
+An unknown `target` name is rejected with a notice (no node is created from a typo).
+Decisions are written to `wip_resolution` and attributed to you.
-When variant associations need correction:
+### Applying resolutions
-```bash
-curl -X POST /curator/change-sets/123/resolve/edit-variants \
- -d "wipHaplogroupId=456" \
- -d 'variantsToAdd=[101, 102, 103]' \
- -d 'variantsToRemove=[50]' \
- -d "notes=Adding missing defining SNPs per ISOGG"
-```
+`POST /curator/reviews/:wip_id/apply` bumps the parent change set to UNDER_REVIEW
+and runs the **same tested change-set apply engine**, enacting your resolutions. It
+reports created / variant-edits / skipped counts. Deferred items are skipped and
+remain in the worklist.
-Parameters:
-- `wipHaplogroupId` or `wipReparentId` — What to resolve
-- `variantsToAdd` — JSON array of variant IDs to add
-- `variantsToRemove` — JSON array of variant IDs to remove
-- `notes` — Explanation
-
-#### MERGE_EXISTING Resolution
-
-When a WIP node duplicates an existing production node:
-
-```bash
-curl -X POST /curator/change-sets/123/resolve/merge-existing \
- -d "wipHaplogroupId=456" \
- -d "mergeTargetId=200" \
- -d "notes=R-M269 already exists as ID 200"
-```
-
-This prevents creating a duplicate — the WIP node's relationships will be redirected to the existing production node.
-
-#### DEFER Resolution
-
-When an item needs expert review before deciding:
+---
-```bash
-curl -X POST /curator/change-sets/123/resolve/defer \
- -d "wipHaplogroupId=456" \
- -d "priority=HIGH" \
- -d "reason=Disputed placement - needs phylogeny expert review" \
- -d "notes=See ISOGG discussion thread #4521"
-```
+## Reviewing large change sets
-Priority levels: `LOW`, `NORMAL`, `HIGH`, `CRITICAL`
+Major source updates produce thousands of changes. A systematic pass:
-Deferred items are excluded from Apply until resolved.
+1. **Triage by anchor strength** — handle the lowest-confidence flagged items first
+ in Merge Reviews.
+2. **Read the diff** for structural changes (reparents) before bulk-approving.
+3. **Search** — use the browser find (Ctrl/Cmd-F) to jump to specific clades in the
+ diff.
+4. **Approve in bulk** — after resolving the flagged items, use **Approve All
+ Pending** for the rest.
-### Viewing Resolutions
+Recommended strategy for 5,000+ changes:
+- Don't review every change — focus on flagged items and reparents.
+- Trust high-strength placements (>80%).
+- Divide by expertise across curators (e.g. "I'll take R1b, you take E-M96"); use
+ change-set **comments** to note who reviewed what.
+- Time-box the review, then reassess.
-**All resolutions for a change set:**
-```
-GET /curator/change-sets/123/resolutions
-```
+---
-**Deferred items only:**
-```
-GET /curator/change-sets/123/deferred
-```
+## Applying to production
-### Cancelling a Resolution
+When you're satisfied:
+1. Ensure flagged items are resolved (or deferred) in Merge Reviews.
+2. Click **Apply** on the change set.
+3. Confirm.
-If you created a resolution by mistake:
+The approved changes are applied to the live tree, the status becomes **APPLIED**,
+and an audit record (`promoted_by`/`promoted_at`) is written.
-```bash
-curl -X DELETE /curator/change-sets/123/resolutions/999
-```
+## Discarding a change set
-This sets the resolution status to `CANCELLED`, effectively removing it.
+If the changes shouldn't be applied, click **Discard** and enter a reason. Common
+reasons: data-quality issues, superseded by a newer merge, or a test merge.
-### Resolution Workflow
+---
-1. **During review**, identify problematic placements via the Ambiguity Report or Tree Preview
-2. **Create resolutions** for items needing correction
-3. **View resolutions** to verify all corrections are in place
-4. **Apply** — the system applies your resolutions during promotion:
- - REPARENT: Uses your specified parent instead of the original
- - EDIT_VARIANTS: Adds/removes variants after node creation
- - MERGE_EXISTING: Skips node creation, remaps relationships
- - DEFER: Skips the item entirely (remains in WIP)
-5. **Resolution status** is updated to `APPLIED` after successful processing
+## Machine / scripted access (management API)
-### API Summary
+The interactive screens above are for curators. A separate **management API**
+(under `/manage/*`, X-API-Key) drives the same lifecycle for automation — e.g. the
+tree-init/graft tooling:
| Endpoint | Method | Description |
|----------|--------|-------------|
-| `/curator/change-sets/:id/resolutions` | GET | List all resolutions |
-| `/curator/change-sets/:id/deferred` | GET | List deferred items |
-| `/curator/change-sets/:id/resolve/reparent` | POST | Create REPARENT resolution |
-| `/curator/change-sets/:id/resolve/edit-variants` | POST | Create EDIT_VARIANTS resolution |
-| `/curator/change-sets/:id/resolve/merge-existing` | POST | Create MERGE_EXISTING resolution |
-| `/curator/change-sets/:id/resolve/defer` | POST | Create DEFER resolution |
-| `/curator/change-sets/:csId/resolutions/:rId` | DELETE | Cancel a resolution |
-
-**Required Permission:** `tree.version.review`
+| `/manage/haplogroups/merge` · `/merge/preview` | POST | Run / preview a merge |
+| `/manage/change-sets` | GET / POST | List / create change sets |
+| `/manage/change-sets/:id` | GET | Change-set detail |
+| `/manage/change-sets/:id/changes` | POST | Add a change |
+| `/manage/change-sets/:id/diff` | GET | Full diff (JSON) |
+| `/manage/change-sets/:id/{start-review,approve-all,apply,discard}` | POST | Lifecycle |
+| `/manage/change-sets/:id/changes/:change_id/review` | POST | Review one change |
---
-## Applying to Production
-
-When you're satisfied with the review:
+## Best practices
-1. Ensure all high-priority items have been reviewed
-2. Click **Apply to Production**
-3. Confirm in the dialog
-
-**What happens:**
-- All approved changes are applied to the live tree
-- Users will see the updated tree structure
-- Change set status becomes "Applied"
-- An audit record is created
-
-**Required Permission:** `tree.version.promote`
+- **Before reviewing:** check the source (trusted authority?) and the scale.
+- **During review:** start with the lowest-confidence flagged items; read the diff
+ for unexpected reparents.
+- **Before applying:** have another curator spot-check large sets; apply outside
+ peak usage.
---
-## Discarding a Change Set
-
-If the changes should not be applied:
+## Workflow example
-1. Click **Discard** (red button)
-2. Enter a reason (required, minimum 10 characters)
-3. Click **Confirm Discard**
+**Scenario:** an ISOGG update with thousands of new nodes.
-**Common reasons to discard:**
-- Data quality issues discovered
-- Superseded by a newer merge
-- Test merge that was never intended for production
-
-**Required Permission:** `tree.version.discard`
-
----
-
-## Best Practices
-
-### Before Reviewing
-
-- Check when the change set was created
-- Review the source - is this a trusted authority?
-- Note the scale - more changes = more careful review needed
-
-### During Review
-
-- Start with the ambiguity report
-- Focus on low-confidence placements first
-- Use the diff view to understand structural changes
-- Look for unexpected reparenting operations
-
-### Before Applying
-
-- Have another curator spot-check large change sets
-- Verify the merge statistics look reasonable
-- Consider the timing (avoid applying during peak usage)
-
----
-
-## Workflow Example
-
-**Scenario:** ISOGG monthly update with 7,537 new nodes
-
-1. Receive notification that change set "isogg-2025-12" is ready
-2. Navigate to Change Sets dashboard
-3. Click the new change set to see details:
- - 7,537 nodes created
- - 2,695 nodes updated
- - 684 ambiguities detected
-4. Click "Start Review"
-5. View the tree preview at `/curator/change-sets/{id}/tree-preview` to understand the structural changes
-6. Click "View Report" to handle the 684 ambiguities
-7. Review each ambiguity:
- - Most are low-risk automatic placements (approve)
- - Some need manual verification (check in tree view)
- - A few should be skipped (data quality issues)
-8. **Create resolutions** for items needing correction:
- - Use REPARENT to fix incorrect parent placements
- - Use EDIT_VARIANTS to add missing SNPs
- - Use MERGE_EXISTING for duplicate nodes
- - Use DEFER for items needing expert research
-9. Return to detail panel
-10. Click "View Full Diff" to spot-check changes
-11. Check `/curator/change-sets/{id}/resolutions` to verify all corrections are in place
-12. Click "Approve All Pending" for remaining items
-13. Click "Apply to Production"
-14. Confirm in the dialog
-15. Done! Check the tree explorer to verify
+1. A change set appears as **READY_FOR_REVIEW** in `/curator/change-sets`.
+2. Open it; read the summary (e.g. *N created, M reparented*) and the diff.
+3. Click **Start Review** (→ UNDER_REVIEW).
+4. Switch to `/curator/reviews` and work the flagged worklist:
+ - **REPARENT** to fix or confirm a parent,
+ - **MERGE_EXISTING** for duplicates,
+ - **DEFER** items needing research.
+5. **Apply** the resolutions from a review item (enacts them via the change-set
+ apply engine).
+6. Back in `/curator/change-sets`, spot-check the diff and **Approve All Pending**.
+7. **Apply** the change set → **APPLIED**.
+8. Verify in the tree explorer (`/ytree` / `/mtree`).
---
## Troubleshooting
-| Symptom | Possible Cause | Solution |
+| Symptom | Possible cause | Solution |
|---------|----------------|----------|
-| "No change sets found" | Filters hiding results | Reset filters to "All Types" and "All Statuses" |
-| "No change sets found" | Missing permissions | Request `tree.version.view` from Admin |
-| "No change sets found" | No recent merges | Check with data team if merges are scheduled |
-| "Apply" button disabled | Change set already applied | Check status — if "Applied", no action needed |
-| "Apply" button disabled | Missing permissions | Request `tree.version.promote` from Admin |
-| "Apply" button disabled | Unresolved ambiguities | View Ambiguity Report and resolve all items |
-| "Discard" button not visible | Missing permissions | Request `tree.version.discard` from Admin |
-| Changes not showing in production | Browser cache | Open in private/incognito window or clear cache |
-| Changes not showing in production | Page not refreshed | Refresh the tree explorer page |
-| Changes not showing in production | Not yet applied | Verify change set status is "Applied" |
-| Tree structure looks wrong | Viewing cached data | Hard refresh (Ctrl+Shift+R / Cmd+Shift+R) |
-| Tree structure looks wrong | Merge had errors | Check merge logs and ambiguity report |
-| Ambiguity count seems high | Large structural changes | Normal for major source updates — review systematically |
-| Resolution not applied | Status still "PENDING" | Check if Apply was run; resolutions apply during promotion |
-| Resolution API returns 400 | Missing required fields | Ensure wipHaplogroupId or wipReparentId is provided |
-| Deferred items still visible | Change set re-applied | Deferred items remain in WIP until manually resolved |
-| Cannot create resolution | Missing permissions | Request `tree.version.review` from Admin |
-
----
-
-## Permissions Summary
-
-| Action | Permission Required |
-|--------|---------------------|
-| View change sets | `tree.version.view` |
-| View resolutions | `tree.version.view` |
-| Review changes | `tree.version.review` |
-| Create resolutions | `tree.version.review` |
-| Cancel resolutions | `tree.version.review` |
-| Apply to production | `tree.version.promote` |
-| Discard change set | `tree.version.discard` |
-
-Contact an administrator if you need additional permissions.
+| "No change sets found" | Status filter hiding results | Reset the status filter |
+| Can't reach the screen | Not a curator | Need the `Curator` / `TreeCurator` / `Admin` role |
+| **Apply** unavailable | Already APPLIED | No action needed |
+| **Apply** unavailable | Wrong status | Apply needs READY_FOR_REVIEW or UNDER_REVIEW (Start Review first) |
+| Resolve returns a notice | Unknown target node name | Use an existing node's exact name |
+| REPARENT/MERGE rejected | No `target` given | REPARENT and MERGE_EXISTING both require a target node name |
+| Deferred items still listed | By design | Deferred items stay in the worklist until resolved |
+| Changes not showing in production | Browser cache / not applied | Hard-refresh; confirm status is APPLIED |
---
-## Related Documentation
+## Related documentation
-- [Tree Versioning System (Technical)](planning/tree-versioning-system.md) - Architecture and implementation details
-- [Conflict Resolution System (Technical)](planning/conflict-resolution-system.md) - Resolution types and data model
-- [Haplogroup Discovery System](planning/haplogroup-discovery-system.md) - How user observations propose new branches
+- [Tree Versioning System (technical)](planning/tree-versioning-system.md) — architecture and data model.
+- [Haplogroup Discovery System](planning/haplogroup-discovery-system.md) — how observations propose new branches.
+- [`rust/README.md`](../rust/README.md) — the curator suite, merge/SNP-graft, and the management API in context.
diff --git a/documents/decoding-us-Y-curator-review.json b/documents/decoding-us-Y-curator-review.json
new file mode 100644
index 00000000..84c96fda
--- /dev/null
+++ b/documents/decoding-us-Y-curator-review.json
@@ -0,0 +1,6963 @@
+{
+ "source": "decoding-us",
+ "dna": "Y_DNA",
+ "summary": {
+ "weak_plurality": 125,
+ "parent_inconsistent": 171,
+ "name_collision": 2,
+ "graft_blocked": 130,
+ "total": 298
+ },
+ "items": [
+ {
+ "node": "Y",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "C2b1a2b2b~",
+ "anchor_strength": 0.16666666666666666,
+ "candidates": [
+ {
+ "node": "C2b1a2b2b~",
+ "hits": 3
+ },
+ {
+ "node": "G2a2b2a1a1c1a1a2a1a1a2b~",
+ "hits": 3
+ },
+ {
+ "node": "I1",
+ "hits": 3
+ },
+ {
+ "node": "O1b1a1a1a1a1b1a1",
+ "hits": 3
+ },
+ {
+ "node": "O1b1a1a1a1b1a1a1a1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 1131,
+ "snps_known_to_foundation": 18,
+ "source_parent": null,
+ "source_parent_status": "(root)",
+ "is_backbone": true
+ },
+ {
+ "node": "A0-T",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "A00",
+ "anchor_strength": 0.9090909090909091,
+ "candidates": [
+ {
+ "node": "A00",
+ "hits": 30
+ },
+ {
+ "node": "J2b",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 1308,
+ "snps_known_to_foundation": 33,
+ "source_parent": "Y",
+ "source_parent_status": "flag_weak",
+ "is_backbone": true
+ },
+ {
+ "node": "A0",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "A0",
+ "anchor_strength": 0.75,
+ "candidates": [
+ {
+ "node": "A0",
+ "hits": 84
+ },
+ {
+ "node": "D1",
+ "hits": 3
+ },
+ {
+ "node": "E1b1b1b2a1a1a1a1b1~",
+ "hits": 3
+ },
+ {
+ "node": "G",
+ "hits": 3
+ },
+ {
+ "node": "I1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 2329,
+ "snps_known_to_foundation": 109,
+ "source_parent": "A0-T",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": true
+ },
+ {
+ "node": "A0-FTA5785",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "I2a",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "I2a",
+ "hits": 3
+ },
+ {
+ "node": "I2a1b1a1a1a2~",
+ "hits": 3
+ },
+ {
+ "node": "Q1b1a1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 1284,
+ "snps_known_to_foundation": 9,
+ "source_parent": "A0",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "A0-FTA5788",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1a1a1b1a1a1b~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1a1a1b1a1a1b~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 297,
+ "snps_known_to_foundation": 3,
+ "source_parent": "A0-FTA5785",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "A1",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "A00",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "A00",
+ "hits": 3
+ },
+ {
+ "node": "A1",
+ "hits": 3
+ },
+ {
+ "node": "C1a2",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 573,
+ "snps_known_to_foundation": 9,
+ "source_parent": "A0-T",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": true
+ },
+ {
+ "node": "A1a",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "A00",
+ "anchor_strength": 0.10714285714285714,
+ "candidates": [
+ {
+ "node": "A00",
+ "hits": 3
+ },
+ {
+ "node": "I1a10b2b2~",
+ "hits": 3
+ },
+ {
+ "node": "I1a2a1a1a2b1b~",
+ "hits": 3
+ },
+ {
+ "node": "I2a1a2a1b",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a5a1b1a1a1a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 3076,
+ "snps_known_to_foundation": 28,
+ "source_parent": "A1",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "A1b",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "A1b",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "A1b",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 129,
+ "snps_known_to_foundation": 3,
+ "source_parent": "A1",
+ "source_parent_status": "flag_weak",
+ "is_backbone": true
+ },
+ {
+ "node": "A1b1-M10831",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "A1b1a1",
+ "anchor_strength": 0.3917525773195876,
+ "candidates": [
+ {
+ "node": "A1b1a1",
+ "hits": 38
+ },
+ {
+ "node": "A1b1a",
+ "hits": 12
+ },
+ {
+ "node": "A1b1a1a2a1a",
+ "hits": 9
+ },
+ {
+ "node": "A1b1a1a2b~",
+ "hits": 6
+ },
+ {
+ "node": "G2a2a1a5",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 2775,
+ "snps_known_to_foundation": 97,
+ "source_parent": "A1b1",
+ "source_parent_status": "matched→A1b1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "A1b1-M9429",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "A1b1b2",
+ "anchor_strength": 0.2608695652173913,
+ "candidates": [
+ {
+ "node": "A1b1b2",
+ "hits": 12
+ },
+ {
+ "node": "A1b1b2b2~",
+ "hits": 7
+ },
+ {
+ "node": "A1b1b2b",
+ "hits": 5
+ },
+ {
+ "node": "A1b1b",
+ "hits": 3
+ },
+ {
+ "node": "I1a1b1a4a2f1a1a7b~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 582,
+ "snps_known_to_foundation": 43,
+ "source_parent": "A1b1",
+ "source_parent_status": "matched→A1b1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "A1b1-M9427",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "A1b1b2",
+ "anchor_strength": 0.45,
+ "candidates": [
+ {
+ "node": "A1b1b2",
+ "hits": 9
+ },
+ {
+ "node": "A1b1b2b2~",
+ "hits": 5
+ },
+ {
+ "node": "E1b1a1a1a1c1b2a3a1~",
+ "hits": 3
+ },
+ {
+ "node": "H1a2a3~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 527,
+ "snps_known_to_foundation": 20,
+ "source_parent": "A1b1-M9429",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "A1b1-M9431",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "A1b1b2a",
+ "anchor_strength": 0.16279069767441862,
+ "candidates": [
+ {
+ "node": "A1b1b2a",
+ "hits": 7
+ },
+ {
+ "node": "B2b1a2a~",
+ "hits": 3
+ },
+ {
+ "node": "E1b1a1a1a1c1a1a3d6a",
+ "hits": 3
+ },
+ {
+ "node": "E1b1b1",
+ "hits": 3
+ },
+ {
+ "node": "E1b1b1b2a1b1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 1063,
+ "snps_known_to_foundation": 43,
+ "source_parent": "A1b1-M9427",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "A1b1-M9428",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "H2",
+ "anchor_strength": 0.3,
+ "candidates": [
+ {
+ "node": "H2",
+ "hits": 3
+ },
+ {
+ "node": "Q1b2b1b2~",
+ "hits": 3
+ },
+ {
+ "node": "B2b3~",
+ "hits": 2
+ },
+ {
+ "node": "I2a2a1a1a2",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 231,
+ "snps_known_to_foundation": 10,
+ "source_parent": "A1b1-M9431",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "A1b1-V193",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "A1b1b2b",
+ "anchor_strength": 0.42028985507246375,
+ "candidates": [
+ {
+ "node": "A1b1b2b",
+ "hits": 29
+ },
+ {
+ "node": "A1b1b2b2~",
+ "hits": 12
+ },
+ {
+ "node": "A1b1b2b3~",
+ "hits": 3
+ },
+ {
+ "node": "C1a1",
+ "hits": 3
+ },
+ {
+ "node": "E1a1a2b",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 1104,
+ "snps_known_to_foundation": 66,
+ "source_parent": "A1b1-M9427",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "A1b1-FGC40000",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "G2a2b2a1a1c1a1a2a1a1a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "G2a2b2a1a1c1a1a2a1a1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 102,
+ "snps_known_to_foundation": 3,
+ "source_parent": "A1b1-V5912",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "B-M8677",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "B2",
+ "anchor_strength": 0.9810924369747899,
+ "candidates": [
+ {
+ "node": "B2",
+ "hits": 467
+ },
+ {
+ "node": "E1b1a1a1a1c1a1a3a1d1b1b1a5~",
+ "hits": 3
+ },
+ {
+ "node": "H1a1a1b",
+ "hits": 3
+ },
+ {
+ "node": "J2b2a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 694,
+ "snps_known_to_foundation": 473,
+ "source_parent": "B-M8675",
+ "source_parent_status": "matched→B~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "B-M6529",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "B2b1b1a",
+ "anchor_strength": 0.14285714285714285,
+ "candidates": [
+ {
+ "node": "B2b1b1a",
+ "hits": 3
+ },
+ {
+ "node": "H1a2a3~",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a4a3a1b1b1~",
+ "hits": 3
+ },
+ {
+ "node": "O1b1a1a1b1a2",
+ "hits": 3
+ },
+ {
+ "node": "O2a2b1a1a1a1a1a1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 1010,
+ "snps_known_to_foundation": 18,
+ "source_parent": "B-M7104",
+ "source_parent_status": "matched→B2b1b~ (96%)",
+ "is_backbone": false
+ },
+ {
+ "node": "B-M6843",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "B2b1b1b~",
+ "anchor_strength": 0.375,
+ "candidates": [
+ {
+ "node": "B2b1b1b~",
+ "hits": 9
+ },
+ {
+ "node": "C1b1a2b",
+ "hits": 3
+ },
+ {
+ "node": "C2a1b",
+ "hits": 3
+ },
+ {
+ "node": "E1b1b1a1a1b1a2a1~",
+ "hits": 3
+ },
+ {
+ "node": "N1a1a1a1a1a1a1a7b1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 909,
+ "snps_known_to_foundation": 24,
+ "source_parent": "B-M7104",
+ "source_parent_status": "matched→B2b1b~ (96%)",
+ "is_backbone": false
+ },
+ {
+ "node": "B-Z5058",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "B3",
+ "anchor_strength": 0.9916247906197655,
+ "candidates": [
+ {
+ "node": "B3",
+ "hits": 1184
+ },
+ {
+ "node": "C",
+ "hits": 3
+ },
+ {
+ "node": "I1a10b2~",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a5a1e3e3~",
+ "hits": 3
+ },
+ {
+ "node": "H3a2b",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 1708,
+ "snps_known_to_foundation": 1188,
+ "source_parent": "B-M8675",
+ "source_parent_status": "matched→B~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "B-Z22657",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "B3b~",
+ "anchor_strength": 0.2857142857142857,
+ "candidates": [
+ {
+ "node": "B3b~",
+ "hits": 6
+ },
+ {
+ "node": "G2a1b2a",
+ "hits": 3
+ },
+ {
+ "node": "I1c1b1~",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a1a2d2b1~",
+ "hits": 3
+ },
+ {
+ "node": "N1a1a1a1a1a2a1a2e~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 455,
+ "snps_known_to_foundation": 21,
+ "source_parent": "B-Z5058",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "B-BY14692",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "D1a2b",
+ "anchor_strength": 0.13636363636363635,
+ "candidates": [
+ {
+ "node": "D1a2b",
+ "hits": 3
+ },
+ {
+ "node": "E1b1a1a1a1a4~",
+ "hits": 3
+ },
+ {
+ "node": "I2a1a1a1a1a3a~",
+ "hits": 3
+ },
+ {
+ "node": "O2a2a1a2a1",
+ "hits": 3
+ },
+ {
+ "node": "Q2a1a1a1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 1815,
+ "snps_known_to_foundation": 19,
+ "source_parent": "B",
+ "source_parent_status": "matched→B~ (96%)",
+ "is_backbone": false
+ },
+ {
+ "node": "B-V1019",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1b1a1b1a1a1g1",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a1g1",
+ "hits": 2
+ },
+ {
+ "node": "I1a2a1a1d1a3~",
+ "hits": 1
+ },
+ {
+ "node": "I1a3b1a1",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 99,
+ "snps_known_to_foundation": 3,
+ "source_parent": "B-BY14692",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "C-F5621",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "C2a",
+ "anchor_strength": 0.4230769230769231,
+ "candidates": [
+ {
+ "node": "C2a",
+ "hits": 165
+ },
+ {
+ "node": "C2a1a2a",
+ "hits": 150
+ },
+ {
+ "node": "C2a1a2",
+ "hits": 30
+ },
+ {
+ "node": "C2a1a",
+ "hits": 14
+ },
+ {
+ "node": "C2a1a2a1a1",
+ "hits": 7
+ }
+ ],
+ "defining_snp_count": 744,
+ "snps_known_to_foundation": 384,
+ "source_parent": "C-CTS93",
+ "source_parent_status": "matched→C2 (98%)",
+ "is_backbone": false
+ },
+ {
+ "node": "C-F3836",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "C2b1a1a",
+ "anchor_strength": 0.4375,
+ "candidates": [
+ {
+ "node": "C2b1a1a",
+ "hits": 21
+ },
+ {
+ "node": "C2b1a1a1",
+ "hits": 15
+ },
+ {
+ "node": "C2b1a1",
+ "hits": 12
+ }
+ ],
+ "defining_snp_count": 84,
+ "snps_known_to_foundation": 48,
+ "source_parent": "C-Z1300",
+ "source_parent_status": "matched→C2b1a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "C-F14880",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "C1a1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "C1a1",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 174,
+ "snps_known_to_foundation": 1,
+ "source_parent": "C-A5925",
+ "source_parent_status": "matched→C2b1a1a1b~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "F",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "C",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "C",
+ "hits": 3
+ },
+ {
+ "node": "F",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 440,
+ "snps_known_to_foundation": 6,
+ "source_parent": "CF",
+ "source_parent_status": "matched→CF (100%)",
+ "is_backbone": true
+ },
+ {
+ "node": "F1",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I1a2a2a4b1~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "I1a2a2a4b1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 87,
+ "snps_known_to_foundation": 3,
+ "source_parent": "F",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "GHIJK",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "GHIJK",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "GHIJK",
+ "hits": 6
+ }
+ ],
+ "defining_snp_count": 9,
+ "snps_known_to_foundation": 6,
+ "source_parent": "F",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": true
+ },
+ {
+ "node": "G-FT344950",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "G2a1a1a1a1a1a1a2a1",
+ "anchor_strength": 0.22727272727272727,
+ "candidates": [
+ {
+ "node": "G2a1a1a1a1a1a1a2a1",
+ "hits": 20
+ },
+ {
+ "node": "G2a1a1a1a1",
+ "hits": 17
+ },
+ {
+ "node": "G2a1a1a1a1a1a",
+ "hits": 9
+ },
+ {
+ "node": "G2a1a1a1a",
+ "hits": 8
+ },
+ {
+ "node": "G2a1a1a1",
+ "hits": 6
+ }
+ ],
+ "defining_snp_count": 174,
+ "snps_known_to_foundation": 88,
+ "source_parent": "G-Z6616",
+ "source_parent_status": "matched→G2a1a1a (97%)",
+ "is_backbone": false
+ },
+ {
+ "node": "G-Z3065",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "G2a2b2a1a",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "G2a2b2a1a",
+ "hits": 3
+ },
+ {
+ "node": "G2a2b2a1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 9,
+ "snps_known_to_foundation": 6,
+ "source_parent": "G-PF3331",
+ "source_parent_status": "matched→G2a2b2a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "G-Z41649",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "G2a2b2a1a1b1a1a2b1b1a",
+ "anchor_strength": 0.391304347826087,
+ "candidates": [
+ {
+ "node": "G2a2b2a1a1b1a1a2b1b1a",
+ "hits": 9
+ },
+ {
+ "node": "G2a2b2a1a1b1a1a2b1b1a2~",
+ "hits": 6
+ },
+ {
+ "node": "G2a2b2a1a1b1a1a2b1",
+ "hits": 3
+ },
+ {
+ "node": "G2a2b2a1a1b1a1a2b1b",
+ "hits": 3
+ },
+ {
+ "node": "G2a2b2a1a1b1a1a2b",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 132,
+ "snps_known_to_foundation": 23,
+ "source_parent": "G-CTS35",
+ "source_parent_status": "matched→G2a2b2a1a1b1a1a2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "G-S12047",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "G2a2b2a1a1b1a1a1",
+ "anchor_strength": 0.47058823529411764,
+ "candidates": [
+ {
+ "node": "G2a2b2a1a1b1a1a1",
+ "hits": 8
+ },
+ {
+ "node": "G2a2b2a1a1b1a1a1a",
+ "hits": 6
+ },
+ {
+ "node": "G2a2b2a1a1b1a1a1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 27,
+ "snps_known_to_foundation": 17,
+ "source_parent": "G-CTS2230",
+ "source_parent_status": "matched→G2a2b2a1a1b1a1a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "G-Y38189",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "G2a2b2a1a1a1a1a1a1a1a1",
+ "anchor_strength": 0.28205128205128205,
+ "candidates": [
+ {
+ "node": "G2a2b2a1a1a1a1a1a1a1a1",
+ "hits": 11
+ },
+ {
+ "node": "G2a2b2a1a1a1a1a1a1a1a",
+ "hits": 9
+ },
+ {
+ "node": "G2a2b2a1a1a1a1a1a1a1a1a",
+ "hits": 6
+ },
+ {
+ "node": "G2a2b2a1a1a1a1a1a1a",
+ "hits": 5
+ },
+ {
+ "node": "G2a2b2a1a1a1a1a1a1a1",
+ "hits": 5
+ }
+ ],
+ "defining_snp_count": 60,
+ "snps_known_to_foundation": 37,
+ "source_parent": "G-Z6158",
+ "source_parent_status": "matched→G2a2b2a1a1a1a (72%)",
+ "is_backbone": false
+ },
+ {
+ "node": "G-Z27232",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "G2a2b2a1a1c1a1a2a1",
+ "anchor_strength": 0.42857142857142855,
+ "candidates": [
+ {
+ "node": "G2a2b2a1a1c1a1a2a1",
+ "hits": 6
+ },
+ {
+ "node": "G2a2b2a1a1c1a1a2a1a",
+ "hits": 5
+ },
+ {
+ "node": "E1b1a1a1a1c1b2a1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 12,
+ "snps_known_to_foundation": 11,
+ "source_parent": "G-Z3292",
+ "source_parent_status": "matched→G2a2b2a1a1c1a1a2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "H-PH24",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "H3b2",
+ "anchor_strength": 0.3,
+ "candidates": [
+ {
+ "node": "H3b2",
+ "hits": 9
+ },
+ {
+ "node": "H3b2a~",
+ "hits": 9
+ },
+ {
+ "node": "E1b1a1",
+ "hits": 3
+ },
+ {
+ "node": "E1b1b1a1b2a4c1a1a1a3~",
+ "hits": 3
+ },
+ {
+ "node": "Q1b1a1a1e1b1a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 521,
+ "snps_known_to_foundation": 30,
+ "source_parent": "H-Z13871",
+ "source_parent_status": "matched→H3b (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "H-Z34945",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "C2a1a1b1",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "C2a1a1b1",
+ "hits": 3
+ },
+ {
+ "node": "H3b2a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 105,
+ "snps_known_to_foundation": 6,
+ "source_parent": "H-PH24",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "H-Y27295",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "G2a1a1a1a1a1a1a1a2a2~",
+ "anchor_strength": 0.42857142857142855,
+ "candidates": [
+ {
+ "node": "G2a1a1a1a1a1a1a1a2a2~",
+ "hits": 3
+ },
+ {
+ "node": "H1a2a1~",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1a2",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 423,
+ "snps_known_to_foundation": 7,
+ "source_parent": "H-Z13996",
+ "source_parent_status": "matched→H1a2a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "H-Y25630",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "C1a2a",
+ "anchor_strength": 0.25,
+ "candidates": [
+ {
+ "node": "C1a2a",
+ "hits": 3
+ },
+ {
+ "node": "H3a1",
+ "hits": 3
+ },
+ {
+ "node": "I1a2b4b1~",
+ "hits": 3
+ },
+ {
+ "node": "L1a1b3c~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 526,
+ "snps_known_to_foundation": 12,
+ "source_parent": "H-Z13966",
+ "source_parent_status": "matched→H1a2a (98%)",
+ "is_backbone": false
+ },
+ {
+ "node": "H-Z34660",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "H1a1a4b2a1",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "H1a1a4b2a1",
+ "hits": 6
+ },
+ {
+ "node": "E1b1a1",
+ "hits": 3
+ },
+ {
+ "node": "H1a1a4b2a1b~",
+ "hits": 3
+ },
+ {
+ "node": "I1",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a3b~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 147,
+ "snps_known_to_foundation": 18,
+ "source_parent": "H-Z14448",
+ "source_parent_status": "matched→H1a1a4b2a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "H-FTA9381",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "C2a",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "C2a",
+ "hits": 3
+ },
+ {
+ "node": "G2a2b2a1a1a1a1a1a1a5b~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 189,
+ "snps_known_to_foundation": 6,
+ "source_parent": "H-FT282446",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "H-FT327094",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "G1a1a2b1c",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "G1a1a2b1c",
+ "hits": 3
+ },
+ {
+ "node": "O1b1a1a1a1b1b1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 203,
+ "snps_known_to_foundation": 6,
+ "source_parent": "H-FT313923",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "L-Z20500",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "L1a1b3a1a1",
+ "anchor_strength": 0.6666666666666666,
+ "candidates": [
+ {
+ "node": "L1a1b3a1a1",
+ "hits": 2
+ },
+ {
+ "node": "L1a1b3a1a1a~",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 3,
+ "source_parent": "L-Z5933",
+ "source_parent_status": "matched→L1a1b3a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "L-FT178620",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "D1a1a1a1a1b",
+ "anchor_strength": 0.25,
+ "candidates": [
+ {
+ "node": "D1a1a1a1a1b",
+ "hits": 3
+ },
+ {
+ "node": "G2a2b2a1a1b1a1a2c",
+ "hits": 3
+ },
+ {
+ "node": "I2a1a1a",
+ "hits": 3
+ },
+ {
+ "node": "I2a1a1a3~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 327,
+ "snps_known_to_foundation": 9,
+ "source_parent": "L-Z20336",
+ "source_parent_status": "matched→L1a1 (95%)",
+ "is_backbone": false
+ },
+ {
+ "node": "T-Z709",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "T1a1a1b",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "T1a1a1b",
+ "hits": 3
+ },
+ {
+ "node": "T1a1a1b2",
+ "hits": 3
+ },
+ {
+ "node": "T1a1a1b2b",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 18,
+ "snps_known_to_foundation": 9,
+ "source_parent": "T-FGC3954",
+ "source_parent_status": "matched→T1a1 (57%)",
+ "is_backbone": false
+ },
+ {
+ "node": "T-CTS934",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "T1a1a1b2b2b1a",
+ "anchor_strength": 0.41379310344827586,
+ "candidates": [
+ {
+ "node": "T1a1a1b2b2b1a",
+ "hits": 12
+ },
+ {
+ "node": "T1a1a1b2b2",
+ "hits": 11
+ },
+ {
+ "node": "Q1b1a1a1h1",
+ "hits": 3
+ },
+ {
+ "node": "T1a1a1b2b2b1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 69,
+ "snps_known_to_foundation": 29,
+ "source_parent": "T-Z709",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "T-B251",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "Q1b1a1a2b2a1~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "Q1b1a1a2b2a1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 132,
+ "snps_known_to_foundation": 3,
+ "source_parent": "T-CTS934",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "T-M11045",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1b1a1a1c1b2a~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "E1b1b1a1a1c1b2a~",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a1c2b2a1b6b",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 108,
+ "snps_known_to_foundation": 6,
+ "source_parent": "T-CTS6280",
+ "source_parent_status": "matched→T1a1a1b2b2b1a1a2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "T-Y5289",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "G2a2b2a1a1b1a1a1a1c3a2~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "G2a2b2a1a1b1a1a1a1c3a2~",
+ "hits": 3
+ },
+ {
+ "node": "T1a1a1b2b2b1a1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 15,
+ "snps_known_to_foundation": 6,
+ "source_parent": "T-A22205",
+ "source_parent_status": "matched→T1a1a1b2b2b1a1a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "T-CTS629",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "T1a1a1b2b2b1a1a1c",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "T1a1a1b2b2b1a1a1c",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 96,
+ "snps_known_to_foundation": 3,
+ "source_parent": "T-Y5289",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "T-FGC29101",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "T1a2a~",
+ "anchor_strength": 0.4186046511627907,
+ "candidates": [
+ {
+ "node": "T1a2a~",
+ "hits": 36
+ },
+ {
+ "node": "T1a2b",
+ "hits": 33
+ },
+ {
+ "node": "T1a2b1",
+ "hits": 6
+ },
+ {
+ "node": "C2a1a2a",
+ "hits": 3
+ },
+ {
+ "node": "Q2b2a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 186,
+ "snps_known_to_foundation": 86,
+ "source_parent": "T",
+ "source_parent_status": "matched→T (57%)",
+ "is_backbone": false
+ },
+ {
+ "node": "K2a",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "NO",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "NO",
+ "hits": 18
+ }
+ ],
+ "defining_snp_count": 21,
+ "snps_known_to_foundation": 18,
+ "source_parent": "K-M526",
+ "source_parent_status": "novel",
+ "is_backbone": true
+ },
+ {
+ "node": "O-FT319264",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "H3b",
+ "anchor_strength": 0.6666666666666666,
+ "candidates": [
+ {
+ "node": "H3b",
+ "hits": 6
+ },
+ {
+ "node": "E1b1a1a1a1c1a1a3d6a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 58,
+ "snps_known_to_foundation": 9,
+ "source_parent": "O-Z23867",
+ "source_parent_status": "matched→O1b1a1a1a1b1b (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "O-ACT740",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1b1a1b2a4c1~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "E1b1b1a1b2a4c1~",
+ "hits": 3
+ },
+ {
+ "node": "O2a2a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 215,
+ "snps_known_to_foundation": 6,
+ "source_parent": "O-P201",
+ "source_parent_status": "matched→O2a2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "O-MF654042",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "O2a2a1a",
+ "anchor_strength": 0.7,
+ "candidates": [
+ {
+ "node": "O2a2a1a",
+ "hits": 21
+ },
+ {
+ "node": "O2a2a1",
+ "hits": 9
+ }
+ ],
+ "defining_snp_count": 249,
+ "snps_known_to_foundation": 30,
+ "source_parent": "O-ACT740",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "O-Z25268",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1a1a1a1c1a1a3d6a",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "E1b1a1a1a1c1a1a3d6a",
+ "hits": 1
+ },
+ {
+ "node": "G2a2a1a2a1",
+ "hits": 1
+ },
+ {
+ "node": "R1b1a1b1a1a1c2b2a1b1a1b2b",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 207,
+ "snps_known_to_foundation": 3,
+ "source_parent": "O-Z25253",
+ "source_parent_status": "matched→O2a2a1a2a1b (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "O-FT321875",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "O2a2a1a2a1a1",
+ "anchor_strength": 0.5384615384615384,
+ "candidates": [
+ {
+ "node": "O2a2a1a2a1a1",
+ "hits": 7
+ },
+ {
+ "node": "I2",
+ "hits": 3
+ },
+ {
+ "node": "O2a2a1a2a1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 69,
+ "snps_known_to_foundation": 13,
+ "source_parent": "O-Z25253",
+ "source_parent_status": "matched→O2a2a1a2a1b (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "O-M2775",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1b1a1b1a1a2c1a2b2b1a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2c1a2b2b1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 16,
+ "snps_known_to_foundation": 3,
+ "source_parent": "O-FT323782",
+ "source_parent_status": "matched→O2a2a1a2a1a1a2 (73%)",
+ "is_backbone": false
+ },
+ {
+ "node": "O-F6280",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "O2a2b1a2a1a1b1b2",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "O2a2b1a2a1a1b1b2",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 4,
+ "snps_known_to_foundation": 1,
+ "source_parent": "O-CTS3763",
+ "source_parent_status": "matched→O2a2b1a2a1a1b1b (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "O-PF3228",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I1a2b1c1~",
+ "anchor_strength": 0.75,
+ "candidates": [
+ {
+ "node": "I1a2b1c1~",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a5",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 255,
+ "snps_known_to_foundation": 4,
+ "source_parent": "O-Z25925",
+ "source_parent_status": "matched→O2a2b1a1a1a4a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "O-F14479",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "O2a2b1a1a1a4a2a1a1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "O2a2b1a1a1a4a2a1a1",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 5,
+ "snps_known_to_foundation": 1,
+ "source_parent": "O-CP086569.2:29543615 G->A",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "O-Z31492",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "O2a2b1a1a1a3a1",
+ "anchor_strength": 0.42857142857142855,
+ "candidates": [
+ {
+ "node": "O2a2b1a1a1a3a1",
+ "hits": 9
+ },
+ {
+ "node": "O2a2b1a1a1a3a1a",
+ "hits": 9
+ },
+ {
+ "node": "G1a1a1b2a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 198,
+ "snps_known_to_foundation": 21,
+ "source_parent": "O-F14249",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "O-F14203",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "D2*",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "D2*",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 10,
+ "snps_known_to_foundation": 2,
+ "source_parent": "O-Z31492",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "O-F2",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "O2a1b1a1a1",
+ "anchor_strength": 0.4117647058823529,
+ "candidates": [
+ {
+ "node": "O2a1b1a1a1",
+ "hits": 49
+ },
+ {
+ "node": "O2a1b1a1a1a",
+ "hits": 21
+ },
+ {
+ "node": "O2a1b1a1a",
+ "hits": 18
+ },
+ {
+ "node": "O2a1b1a1a1a1",
+ "hits": 16
+ },
+ {
+ "node": "O2a1b1a1",
+ "hits": 12
+ }
+ ],
+ "defining_snp_count": 224,
+ "snps_known_to_foundation": 116,
+ "source_parent": "O-CP086569.2:5919079 T->C",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "O-M5420",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "E1b1",
+ "hits": 3
+ },
+ {
+ "node": "O2a1b1a1a1a1e",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 5,
+ "snps_known_to_foundation": 3,
+ "source_parent": "O-F2",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "O-A4899",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "A00",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "A00",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 11,
+ "snps_known_to_foundation": 3,
+ "source_parent": "O-F17",
+ "source_parent_status": "matched→O2a1b1a1a1a1a1a1 (91%)",
+ "is_backbone": false
+ },
+ {
+ "node": "O-MF190873",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "G",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "G",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 10,
+ "snps_known_to_foundation": 1,
+ "source_parent": "O-FT38156",
+ "source_parent_status": "matched→O2a1b (92%)",
+ "is_backbone": false
+ },
+ {
+ "node": "N",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "N~",
+ "anchor_strength": 0.4516765285996055,
+ "candidates": [
+ {
+ "node": "N~",
+ "hits": 229
+ },
+ {
+ "node": "N",
+ "hits": 197
+ },
+ {
+ "node": "N1",
+ "hits": 75
+ },
+ {
+ "node": "N1a1a1a1a1a2a",
+ "hits": 3
+ },
+ {
+ "node": "R1a1a1b2a1a2c2d2a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 1212,
+ "snps_known_to_foundation": 501,
+ "source_parent": "NO",
+ "source_parent_status": "matched→NO1 (84%)",
+ "is_backbone": true
+ },
+ {
+ "node": "N-Z19801",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "N1a1a1a1",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "N1a1a1a1",
+ "hits": 24
+ },
+ {
+ "node": "N1a1a1a1a",
+ "hits": 24
+ }
+ ],
+ "defining_snp_count": 65,
+ "snps_known_to_foundation": 33,
+ "source_parent": "N-Z4745",
+ "source_parent_status": "matched→N1a1a1a1a (75%)",
+ "is_backbone": false
+ },
+ {
+ "node": "N-Z1922",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "N1a1a1a1a2a1",
+ "anchor_strength": 0.38095238095238093,
+ "candidates": [
+ {
+ "node": "N1a1a1a1a2a1",
+ "hits": 8
+ },
+ {
+ "node": "N1a1a1a1a2",
+ "hits": 7
+ },
+ {
+ "node": "N1a1a1a1a2a~",
+ "hits": 6
+ }
+ ],
+ "defining_snp_count": 45,
+ "snps_known_to_foundation": 21,
+ "source_parent": "N-CTS27",
+ "source_parent_status": "matched→N1a1a1a1a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "N-Y262049",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1b1a1b1a1a1c2b2a1b6b",
+ "anchor_strength": 0.6666666666666666,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a1c2b2a1b6b",
+ "hits": 6
+ },
+ {
+ "node": "N1a1a1a1a2a1a1a1a1a1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 22,
+ "snps_known_to_foundation": 9,
+ "source_parent": "N-Z19826",
+ "source_parent_status": "matched→N1a1a1a1a2a1a1a1a1a1a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "N-Z19831",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "N1a1a1a1a2a1a1a1a1a1a1a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "N1a1a1a1a2a1a1a1a1a1a1a",
+ "hits": 7
+ }
+ ],
+ "defining_snp_count": 17,
+ "snps_known_to_foundation": 7,
+ "source_parent": "N-Y262049",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "N-Z8029",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I1a1b1a1c1~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "I1a1b1a1c1~",
+ "hits": 3
+ },
+ {
+ "node": "N1b1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 3,
+ "source_parent": "N-ACT2487",
+ "source_parent_status": "matched→N1b1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "N-Z19706",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "N1b1a1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "N1b1a1",
+ "hits": 6
+ }
+ ],
+ "defining_snp_count": 20,
+ "snps_known_to_foundation": 6,
+ "source_parent": "N-Z8029",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "N-PF3228",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "J1a2a1a2d2b2b2c4d2a2a5",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a5",
+ "hits": 1
+ },
+ {
+ "node": "N1a1a1a1a2a1a",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 25,
+ "snps_known_to_foundation": 2,
+ "source_parent": "N-FT324649",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "S-Z41931",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "D1a2a",
+ "anchor_strength": 0.25,
+ "candidates": [
+ {
+ "node": "D1a2a",
+ "hits": 3
+ },
+ {
+ "node": "G2a2b2b1a1a1a1a1",
+ "hits": 3
+ },
+ {
+ "node": "R",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a2c1a1d1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 1212,
+ "snps_known_to_foundation": 12,
+ "source_parent": "S",
+ "source_parent_status": "matched→S (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "P",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "I2a2a1a1a2",
+ "anchor_strength": 0.375,
+ "candidates": [
+ {
+ "node": "I2a2a1a1a2",
+ "hits": 3
+ },
+ {
+ "node": "P",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 267,
+ "snps_known_to_foundation": 8,
+ "source_parent": "K2b",
+ "source_parent_status": "novel",
+ "is_backbone": true
+ },
+ {
+ "node": "Q-FT310416",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "Q2b1a1",
+ "anchor_strength": 0.475,
+ "candidates": [
+ {
+ "node": "Q2b1a1",
+ "hits": 19
+ },
+ {
+ "node": "Q2b1",
+ "hits": 12
+ },
+ {
+ "node": "Q2b1a",
+ "hits": 6
+ },
+ {
+ "node": "Q2b1a1a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 170,
+ "snps_known_to_foundation": 40,
+ "source_parent": "Q-YP748",
+ "source_parent_status": "matched→Q2b (98%)",
+ "is_backbone": false
+ },
+ {
+ "node": "Q-BZ3056",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "Q2a",
+ "anchor_strength": 0.33916083916083917,
+ "candidates": [
+ {
+ "node": "Q2a",
+ "hits": 97
+ },
+ {
+ "node": "Q2a1a",
+ "hits": 59
+ },
+ {
+ "node": "Q2a1",
+ "hits": 46
+ },
+ {
+ "node": "Q2a1a3",
+ "hits": 39
+ },
+ {
+ "node": "Q2a1a3a1a~",
+ "hits": 36
+ }
+ ],
+ "defining_snp_count": 404,
+ "snps_known_to_foundation": 283,
+ "source_parent": "Q-L612",
+ "source_parent_status": "matched→Q2 (96%)",
+ "is_backbone": false
+ },
+ {
+ "node": "Q-F4531",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "Q1a2",
+ "anchor_strength": 0.36829268292682926,
+ "candidates": [
+ {
+ "node": "Q1a2",
+ "hits": 151
+ },
+ {
+ "node": "Q1a2a2a~",
+ "hits": 120
+ },
+ {
+ "node": "Q1a2a",
+ "hits": 72
+ },
+ {
+ "node": "Q1a2a2~",
+ "hits": 55
+ },
+ {
+ "node": "Q1a2a2a1~",
+ "hits": 6
+ }
+ ],
+ "defining_snp_count": 712,
+ "snps_known_to_foundation": 407,
+ "source_parent": "Q-Y663",
+ "source_parent_status": "matched→Q1a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "Q-FT6742",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "C2",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "C2",
+ "hits": 3
+ },
+ {
+ "node": "Q1a1a2",
+ "hits": 3
+ },
+ {
+ "node": "Q1a1a2a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 162,
+ "snps_known_to_foundation": 9,
+ "source_parent": "Q-Y683",
+ "source_parent_status": "matched→Q1a1a (98%)",
+ "is_backbone": false
+ },
+ {
+ "node": "Q-Z32422",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "C1b1a2a",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "C1b1a2a",
+ "hits": 3
+ },
+ {
+ "node": "Q1b1a1a1h",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 3,
+ "source_parent": "Q-M826",
+ "source_parent_status": "matched→Q1b1a1a (69%)",
+ "is_backbone": false
+ },
+ {
+ "node": "Q-B35",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "Q1b1a1a1h1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "Q1b1a1a1h1",
+ "hits": 88
+ }
+ ],
+ "defining_snp_count": 219,
+ "snps_known_to_foundation": 88,
+ "source_parent": "Q-Z32422",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "Q-FT333378",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "Q1b1a1a1j1b~",
+ "anchor_strength": 0.38461538461538464,
+ "candidates": [
+ {
+ "node": "Q1b1a1a1j1b~",
+ "hits": 15
+ },
+ {
+ "node": "Q1b1a1a1k1b~",
+ "hits": 15
+ },
+ {
+ "node": "B2b1a2~",
+ "hits": 3
+ },
+ {
+ "node": "H1a2b1b",
+ "hits": 3
+ },
+ {
+ "node": "Q1a2a2b~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 267,
+ "snps_known_to_foundation": 24,
+ "source_parent": "Q-Z19429",
+ "source_parent_status": "matched→Q1b1a1a1k1 (75%)",
+ "is_backbone": false
+ },
+ {
+ "node": "Q-CTS193",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "Q1b1a1a1e1b~",
+ "anchor_strength": 0.4117647058823529,
+ "candidates": [
+ {
+ "node": "Q1b1a1a1e1b~",
+ "hits": 21
+ },
+ {
+ "node": "Q1b1a1a1e1b1a~",
+ "hits": 15
+ },
+ {
+ "node": "Q1b1a1a1e1b1~",
+ "hits": 9
+ },
+ {
+ "node": "B3",
+ "hits": 3
+ },
+ {
+ "node": "J2a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 304,
+ "snps_known_to_foundation": 48,
+ "source_parent": "Q-M825",
+ "source_parent_status": "matched→Q1b1a1a1e1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "Q-YP4716",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "N1a1a1a1a1a",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "N1a1a1a1a1a",
+ "hits": 3
+ },
+ {
+ "node": "Q1b1a1a1e1a",
+ "hits": 3
+ },
+ {
+ "node": "Q1b1a1a1m",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 26,
+ "snps_known_to_foundation": 9,
+ "source_parent": "Q-FGC8093",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "Q-Y28017",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "Q1b1a1a1m2~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "Q1b1a1a1m2~",
+ "hits": 15
+ }
+ ],
+ "defining_snp_count": 30,
+ "snps_known_to_foundation": 15,
+ "source_parent": "Q-YP4716",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "R2-FGC46630",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "A00",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "A00",
+ "hits": 3
+ },
+ {
+ "node": "R2a2b2a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 160,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R2-FGC51793",
+ "source_parent_status": "matched→R2a2b2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R2-Z29192",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1a1a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1a1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 111,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R2-Y1332",
+ "source_parent_status": "matched→R2a2b1b2b3 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R2-Z29227",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "J2a2~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "J2a2~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 44,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R2-V2434",
+ "source_parent_status": "matched→R2a2b1b2b3b (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R2-FGC18155",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I2a1a2a1a1b3b",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "I2a1a2a1a1b3b",
+ "hits": 3
+ },
+ {
+ "node": "R2a2b1b2b3b2a1b",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 150,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R2-V2947",
+ "source_parent_status": "matched→R2a2b1b2b3b2a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R2-A27695",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "J2b",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "J2b",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 196,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R2-V4569",
+ "source_parent_status": "matched→R2a2b1b2b (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R2-S10301",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "G2a2a1a2a1a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "G2a2a1a2a1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R2-Z29284",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R2-FGC17618",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "Q1b1b~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "Q1b1b~",
+ "hits": 3
+ },
+ {
+ "node": "R2a2b1b1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 75,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R2-V1180",
+ "source_parent_status": "matched→R2a2b1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R2-Y61448",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1b1a1a1b2~",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "E1b1b1a1a1b2~",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a5a1e1a2~",
+ "hits": 3
+ },
+ {
+ "node": "R1a1a1b1a3a1a2d~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 159,
+ "snps_known_to_foundation": 9,
+ "source_parent": "R2-FGC17618",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "R2-FGC17629",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "C1b2b",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "C1b2b",
+ "hits": 3
+ },
+ {
+ "node": "N1a1a1a1a",
+ "hits": 3
+ },
+ {
+ "node": "R2a2b1b1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 156,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R2-FGC17618",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "R2-FGC57535",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R2a2b1b1a2",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R2a2b1b1a2",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R2-FGC17629",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "R2-FGC17661",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R2a2b1b1a1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R2a2b1b1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R2-FGC17629",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "R2-Y17972",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I2a1a1b2",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "I2a1a1b2",
+ "hits": 3
+ },
+ {
+ "node": "R2a2a1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 57,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R2-FGC13192",
+ "source_parent_status": "matched→R2a2a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R2-FGC13185",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "A00",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "A00",
+ "hits": 3
+ },
+ {
+ "node": "E1a2b1a1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 138,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R2-Y17972",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "R2-FGC13210",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R2a2a1a1a1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R2a2a1a1a1",
+ "hits": 6
+ }
+ ],
+ "defining_snp_count": 12,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R2-FGC13185",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "R2-FGC61415",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "O2a2b1a1a1d2",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "O2a2b1a1a1d2",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 69,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R2-FGC13203",
+ "source_parent_status": "matched→R2a2a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1a-PF6234",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1a1",
+ "anchor_strength": 0.36101083032490977,
+ "candidates": [
+ {
+ "node": "R1a1",
+ "hits": 100
+ },
+ {
+ "node": "R1a1a",
+ "hits": 90
+ },
+ {
+ "node": "R1a1a1",
+ "hits": 76
+ },
+ {
+ "node": "I1a2a1a1a3a2~",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4c4a1a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 341,
+ "snps_known_to_foundation": 266,
+ "source_parent": "R1a",
+ "source_parent_status": "matched→R1a (97%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1a-S2846",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1a1a1a1b1a~",
+ "anchor_strength": 0.47368421052631576,
+ "candidates": [
+ {
+ "node": "R1a1a1a1b1a~",
+ "hits": 27
+ },
+ {
+ "node": "R1a1a1a1",
+ "hits": 9
+ },
+ {
+ "node": "R1a1a1a~",
+ "hits": 9
+ },
+ {
+ "node": "R1a1a1a1b1~",
+ "hits": 6
+ },
+ {
+ "node": "R1a1a1a1b~",
+ "hits": 6
+ }
+ ],
+ "defining_snp_count": 80,
+ "snps_known_to_foundation": 57,
+ "source_parent": "R1a-PF6234",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "R1a-FGC21102",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "J1b1~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "J1b1~",
+ "hits": 3
+ },
+ {
+ "node": "R1a1a1a1b1a2~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 45,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1a-S2846",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "R1a-Y60196",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "G2a2b2a1a1a1b1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "G2a2b2a1a1a1b1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 114,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1a-Y878",
+ "source_parent_status": "matched→R1a1a1b2a1a2c1d2~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1a-Z31469",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "G2a1a2a2~",
+ "anchor_strength": 0.25,
+ "candidates": [
+ {
+ "node": "G2a1a2a2~",
+ "hits": 3
+ },
+ {
+ "node": "I2a2b",
+ "hits": 3
+ },
+ {
+ "node": "R1a1a1b1a2a3a1b~",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a1c2b2b1a1a1d1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 81,
+ "snps_known_to_foundation": 12,
+ "source_parent": "R1a-Y878",
+ "source_parent_status": "matched→R1a1a1b2a1a2c1d2~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1a-A24429",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1b1a1",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "E1b1b1a1",
+ "hits": 3
+ },
+ {
+ "node": "R1a1a1b2a1a2c2d5a~",
+ "hits": 3
+ },
+ {
+ "node": "R1a1a1b2a1a2c2d5~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 81,
+ "snps_known_to_foundation": 9,
+ "source_parent": "R1a-Y944",
+ "source_parent_status": "matched→R1a1a1b2a1a2c2d~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1a-FGC7401",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1a1a1b2a1a2b~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1a1a1b2a1a2b~",
+ "hits": 9
+ }
+ ],
+ "defining_snp_count": 9,
+ "snps_known_to_foundation": 9,
+ "source_parent": "R1a-FGC7398",
+ "source_parent_status": "matched→R1a1a1b2a1a2a~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1a-FTA9496",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1b1b2a1a6d1a1b~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1b1b2a1a6d1a1b~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 96,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1a-Y929",
+ "source_parent_status": "matched→R1a1a1b2a1a1a1f1~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1a-FT310821",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 48,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1a-Y20746",
+ "source_parent_status": "matched→R1a1a1b2a2a1c~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1a-M12427",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1a1a1a1c1a1a3e1~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "E1b1a1a1a1c1a1a3e1~",
+ "hits": 3
+ },
+ {
+ "node": "Q1b1b~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 66,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1a-M12441",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1a-Y144479",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1a1a1b1a2b3a3a2h1b1b~",
+ "anchor_strength": 0.625,
+ "candidates": [
+ {
+ "node": "R1a1a1b1a2b3a3a2h1b1b~",
+ "hits": 15
+ },
+ {
+ "node": "I1a2b1c1~",
+ "hits": 3
+ },
+ {
+ "node": "R1a1a1b1a2b3a3a2g2c1~",
+ "hits": 3
+ },
+ {
+ "node": "R1a1a1b1a2b3a3a2g2c~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 75,
+ "snps_known_to_foundation": 21,
+ "source_parent": "R1a-FGC10352",
+ "source_parent_status": "matched→R1a1a1b1a2b3a3a2g2~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1a-FGC11896",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1a1a1b1a3a1a1a1a1b2~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1a1a1b1a3a1a1a1a1b2~",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 2,
+ "source_parent": "R1a-FGC11888",
+ "source_parent_status": "matched→R1a1a1b1a3a1a1a (50%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1a-FGC55633",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1a1a1b1a3a1a3c~",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "R1a1a1b1a3a1a3c~",
+ "hits": 6
+ },
+ {
+ "node": "R1a1a1b1a3a1a3~",
+ "hits": 6
+ },
+ {
+ "node": "R1a1a1b1a3a1a3a",
+ "hits": 3
+ },
+ {
+ "node": "R1a1a1b1a3a1a3c1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 21,
+ "snps_known_to_foundation": 18,
+ "source_parent": "R1a-CTS3438",
+ "source_parent_status": "matched→R1a1a1b1a3a1a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1a-FGC33255",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1a1a1b1a3a2b",
+ "anchor_strength": 0.42857142857142855,
+ "candidates": [
+ {
+ "node": "R1a1a1b1a3a2b",
+ "hits": 27
+ },
+ {
+ "node": "R1a1a1b1a3a2b2b~",
+ "hits": 15
+ },
+ {
+ "node": "R1a1a1b1a3a2b2b1b~",
+ "hits": 6
+ },
+ {
+ "node": "R1a1a1b1a3a2b2~",
+ "hits": 6
+ },
+ {
+ "node": "J2b2a1a1a1a1a2a1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 66,
+ "snps_known_to_foundation": 54,
+ "source_parent": "R1a-S5084",
+ "source_parent_status": "matched→R1a1a1b1a3a2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-BY19023",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1b1a1b1a1a1c2f~",
+ "anchor_strength": 0.75,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a1c2f~",
+ "hits": 6
+ },
+ {
+ "node": "R1b1a1b1a1a1c2d1",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 96,
+ "snps_known_to_foundation": 8,
+ "source_parent": "R1b-FGC8512",
+ "source_parent_status": "matched→R1b1a1b1a1a1c2d (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-A5587",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1a2a1b1b~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "E1a2a1b1b~",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a1a2c1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 72,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-S1731",
+ "source_parent_status": "matched→R1b1a1b1a1a1c2b2b1a (88%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-CTS832",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1b1a1b1a1a2c1a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2c1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 33,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-S11136",
+ "source_parent_status": "matched→R1b1a1b1a1a1c2b2b1a2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC51313",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "C1b1a1a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "C1b1a1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 48,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-Y21408",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-A17378",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "C2b1a2a2b",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "C2b1a2a2b",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 21,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-Z5054",
+ "source_parent_status": "matched→R1b1a1b1a1a1c2b2b1a1a1e3 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FT156503",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1a1a1b1a2b3a1b2a~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1a1a1b1a2b3a1b2a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 42,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-A17378",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-CTS349",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1a3~",
+ "anchor_strength": 0.42857142857142855,
+ "candidates": [
+ {
+ "node": "E1a3~",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a1c2b2b1a3~",
+ "hits": 3
+ },
+ {
+ "node": "C1a1",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 65,
+ "snps_known_to_foundation": 7,
+ "source_parent": "R1b-Z80",
+ "source_parent_status": "matched→R1b1a1b1a1a1c2b2b1a1a1b2b (50%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC84309",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I2a1b2a2a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "I2a1b2a2a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 51,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-S10353",
+ "source_parent_status": "matched→R1b1a1b1a1a1c2b2a1b2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-S5235",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "Q1a1",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "Q1a1",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a1c2b2a1b1a4b2a2",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 12,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-S5231",
+ "source_parent_status": "matched→R1b1a1b1a1a1c2b2a1b1a4b2 (50%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-S5627",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1b1a1b1a1a1c2b2a1b1a4b2a2c",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a1c2b2a1b1a4b2a2c",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-S5235",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-BY25300",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I2a1a1a1a1a2~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "I2a1a1a1a1a2~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 41,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-S20321",
+ "source_parent_status": "matched→R1b1a1b1a1a1c2b2a1b1a4b1a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-CTS604",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "B2b1a2b2a~",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "B2b1a2b2a~",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a1c2b2a1b1a1a2a",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a1c2b2a1b1a1a2a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 60,
+ "snps_known_to_foundation": 9,
+ "source_parent": "R1b-Z8175",
+ "source_parent_status": "matched→R1b1a1b1a1a1c2b2a1b1a1a2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC61369",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "H3b1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "H3b1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 81,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-S271",
+ "source_parent_status": "matched→R1b1a1b1a1a1c2b2a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FT289143",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "Q2a1c2",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "Q2a1c2",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 51,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-L48",
+ "source_parent_status": "matched→R1b1a1b1a1a1c2b (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC30517",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1b1a1b1a1a1c2a1d2a2",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a1c2a1d2a2",
+ "hits": 6
+ },
+ {
+ "node": "R1b1a1b1a1a1c2a1d2a3b1",
+ "hits": 6
+ },
+ {
+ "node": "R1b1a1b1a1a1c2a1d2a3",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a1c2a1d2a3b",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 27,
+ "snps_known_to_foundation": 18,
+ "source_parent": "R1b-S9787",
+ "source_parent_status": "matched→R1b1a1b1a1a1c2a1d2 (50%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC13326",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1b1a1b1a1a1c1a2b",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a1c1a2b",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 2,
+ "source_parent": "R1b-S1785",
+ "source_parent_status": "matched→R1b1a1b1a1a1c1a2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-YFS154845",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1b1a1b1a1a1c1a2a1",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a1c1a2a1",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a1c1a2b1a",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a1c1a2b1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 75,
+ "snps_known_to_foundation": 9,
+ "source_parent": "R1b-S25234",
+ "source_parent_status": "matched→R1b1a1b1a1a1c1a2b1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC23197",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I1a1a1a1b1a~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "I1a1a1a1b1a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 105,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-S1785",
+ "source_parent_status": "matched→R1b1a1b1a1a1c1a2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-S18823",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "G2a2b2a1a1c1a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "G2a2b2a1a1c1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 9,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-S265",
+ "source_parent_status": "matched→R1b1a1b1a1a1c1a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Y18881",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I1a2a1a1c2~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "I1a2a1a1c2~",
+ "hits": 3
+ },
+ {
+ "node": "L1a1b3a1a1a~",
+ "hits": 2
+ },
+ {
+ "node": "R1b1a1b1a1a1c2c",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 69,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-S263",
+ "source_parent_status": "matched→R1b1a1b1a1a1c (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-S5676",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "B2",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "B2",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 33,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-S3207",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-A14184",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1b1b1a2~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1b1b1a2~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 27,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-S5676",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-SK2102",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "I2a1b1a2a2a1",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "I2a1b1a2a2a1",
+ "hits": 3
+ },
+ {
+ "node": "I2a2a1b1",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a1d2",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 94,
+ "snps_known_to_foundation": 9,
+ "source_parent": "R1b-FGC396",
+ "source_parent_status": "matched→R1b1a1b1a1a1d (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-BY700",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "N1a1a1a1a1a2a1a2i1~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "N1a1a1a1a1a2a1a2i1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 108,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-DF63",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1b (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-BY7771",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "C2b1a1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "C2b1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 117,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-FGC36422",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1b2a1a1a (86%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC17160",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "O1a1b",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "O1a1b",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a2c1b1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 89,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-A91",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1b1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Y23251",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I1a1b1a4a1a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "I1a1b1a4a1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 54,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-Y23438",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC5496",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1b1a1b1a1a2c1a",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2c1a",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a2c1a6a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 6,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-FGC5494",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1a6 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FT299995",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "C1a1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "C1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 110,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-Z17967",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Z18090",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "B3",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "B3",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a1a2d1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 123,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-S9294",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-A224",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1b1a1b1a1a2c1a1a1a1a1",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2c1a1a1a1a1",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a2c1a1a1a1a1a1a4a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 6,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-A223",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1a1a1a1a1a1a4 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Y16739",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "Q1b2b1a2e~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "Q1b2b1a2e~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 6,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-FGC52372",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-ZS8379",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "J1a2a1a2d2b2b2c4d2a2a5a1e3f~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a5a1e3f~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-S6151",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-A13155",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1b1b2a1b1~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1b1b2a1b1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 6,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-A694",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1a1a1a1a1a1a2a2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Z17592",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1b1a1b1a1a2a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 81,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-Z2961",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1a1a1a1a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-CTS78",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1b1a1b2a4b1~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1b1a1b2a4b1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 32,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-CTS360",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC46820",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E2b",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E2b",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 108,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-FGC42321",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-A97",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1b1a1b1a1a2c1a1d5a",
+ "anchor_strength": 0.46153846153846156,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2c1a1d5a",
+ "hits": 18
+ },
+ {
+ "node": "R1b1a1b1a1a2c1a1d5",
+ "hits": 9
+ },
+ {
+ "node": "R1b1a1b1a1a1c2a1d2",
+ "hits": 6
+ },
+ {
+ "node": "R1b1a1b1a1a2c1a1d5a1~",
+ "hits": 6
+ }
+ ],
+ "defining_snp_count": 57,
+ "snps_known_to_foundation": 33,
+ "source_parent": "R1b-A98",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1a1d5 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-S3808",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "G2a2b2a1a1a1a",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "G2a2b2a1a1a1a",
+ "hits": 3
+ },
+ {
+ "node": "N~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 122,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-V1246",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC13776",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1a1a1a1c1a1a3d6a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1a1a1a1c1a1a3d6a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 52,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-FGC13783",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1a1j (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-BY64238",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1b1b2b2b1~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1b1b2b2b1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 165,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-DC268",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC18023",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1a1a1b1a3a1a2e2~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1a1a1b1a3a1a2e2~",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 39,
+ "snps_known_to_foundation": 2,
+ "source_parent": "R1b-FGC18022",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1a3b1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-A9040",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "N1b1",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "N1b1",
+ "hits": 3
+ },
+ {
+ "node": "R1a1a1b1a2b3a1c~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 155,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-Z17992",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC49407",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1a1a1b1a1a1c1c1b1~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1a1a1b1a1a1c1c1b1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 25,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-A286",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1a3b (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-A6518",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I2a1a1a1a1a1a1e3b~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "I2a1a1a1a1a1a1e3b~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 50,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-A212",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1a3a2a2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Y34442",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I2a1b1a2b1a1b1a2~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "I2a1b1a2b1a1b1a2~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-BY93490",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC29280",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1b1a1b1a1a2c1a3a2a1a2a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2c1a3a2a1a2a",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 9,
+ "snps_known_to_foundation": 1,
+ "source_parent": "R1b-CP086569.2:18958846 G->A",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Z2358",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "J1a2a~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "J1a2a~",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a2c1a5d3a1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 12,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-Z17981",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1a3a2a1a2c1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-BY207095",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "B2b1a2a~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "B2b1a2a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 21,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-BY208692",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-A155",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1b1a1b1a1a2c1a3a2a1b1a2a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2c1a3a2a1b1a2a",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 1,
+ "source_parent": "R1b-A89",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1a3a2a1b1a2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC65809",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "C1b1a1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "C1b1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 111,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-A9871",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-S15280",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1b1a1b1a1a2c1a4b5a2",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2c1a4b5a2",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 6,
+ "snps_known_to_foundation": 1,
+ "source_parent": "R1b-S7898",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1a4b5a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-S841",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "C1a2a2",
+ "anchor_strength": 0.375,
+ "candidates": [
+ {
+ "node": "C1a2a2",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a2c1a4b",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a2c1a4b4a",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 83,
+ "snps_known_to_foundation": 8,
+ "source_parent": "R1b-Z253",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1a4b (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Z17685",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1b1a1b1a1a2c1a4b4a1a1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2c1a4b4a1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-S841",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC3222",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1b1a1b1a1a2c1a4b3a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2c1a4b3a",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 84,
+ "snps_known_to_foundation": 2,
+ "source_parent": "R1b-Z253",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1a4b (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Z17673",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1b1a1b1a1a2c1a4b2c1a1",
+ "anchor_strength": 0.4,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2c1a4b2c1a1",
+ "hits": 6
+ },
+ {
+ "node": "R1b1a1b1a1a2c1a4b2c1a1b",
+ "hits": 6
+ },
+ {
+ "node": "R1b1a1b1a1a2c1a4b2c1a1b1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 60,
+ "snps_known_to_foundation": 15,
+ "source_parent": "R1b-L1066",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-BY17724",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "Q2a1a4b~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "Q2a1a4b~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 48,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-L159",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-A6903",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "C1a2a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "C1a2a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 6,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-chrY:9914120 A->G",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Y139017",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1b1b2a2a1a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1b2a2a1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 41,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-A6903",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-BY9596",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "Q1b1b1a~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "Q1b1b1a~",
+ "hits": 9
+ },
+ {
+ "node": "R1b1a1b1a1a2c1a5b1a1a1a1b",
+ "hits": 9
+ }
+ ],
+ "defining_snp_count": 12,
+ "snps_known_to_foundation": 9,
+ "source_parent": "R1b-S281",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1a5b1a1a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-S953",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "G2a2b2a4",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "G2a2b2a4",
+ "hits": 3
+ },
+ {
+ "node": "H1a1a4a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 48,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-S956",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1a5d3a1a~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-F15205",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "B2",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "B2",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 75,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-FGC33056",
+ "source_parent_status": "matched→R1b1a1b1a1a2c1a5c2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Z205",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1b1a1b1a1a2a1b1a1a~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2a1b1a1a~",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 2,
+ "source_parent": "R1b-Z264",
+ "source_parent_status": "matched→R1b1a1b1a1a2a1b1a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC65779",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "B2b1a2a~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "B2b1a2a~",
+ "hits": 3
+ },
+ {
+ "node": "I1c1c1a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 80,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-Z205",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-CTS422",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "D1a1a1b",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "D1a1a1b",
+ "hits": 3
+ },
+ {
+ "node": "E1b1a1a1a1c1a1a3c2b2~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 104,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-Z205",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FT294551",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "O2a1b1a1a1a1a2",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "O2a1b1a1a1a1a2",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 99,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-FGC35927",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Y31334",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I2a1a2a2",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "I2a1a2a2",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 47,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-CTS7359",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Y151131",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E2b1a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E2b1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 6,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-Z298",
+ "source_parent_status": "matched→R1b1a1b1a1a2a1a1a1a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FT294663",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "J1a2b1b2a~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "J1a2b1b2a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 41,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-Y151131",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-chrY:8458210 C->T",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "J1a2a1a2d2b2a1c~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "J1a2a1a2d2b2a1c~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 71,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-Z29668",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-CTS609",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "J1a2a1a2d2b2b2c4d2a2a5a1e4a6i12~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a5a1e4a6i12~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 42,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-PH312",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC20540",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I1a1b1a1e2d1~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "I1a1b1a1e2d1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 45,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-BY21072",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-V2059",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1b1a1b1a1a2a5",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2a5",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 6,
+ "snps_known_to_foundation": 1,
+ "source_parent": "R1b-DF27",
+ "source_parent_status": "matched→R1b1a1b1a1a2a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FT19025",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "G2a2b2a1a1c1a1a2a2a4a~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "G2a2b2a1a1c1a1a2a2a4a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 78,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-CTS8001",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-CTS129",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1b1a1b1a1a2a7~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2a7~",
+ "hits": 7
+ }
+ ],
+ "defining_snp_count": 96,
+ "snps_known_to_foundation": 7,
+ "source_parent": "R1b-V2059",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Y23650",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "J1a2a1a2d2b2b2c4d2a2a1a1a1b~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a1a1a1b~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 12,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-DF27",
+ "source_parent_status": "matched→R1b1a1b1a1a2a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC95121",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "G2a1a1a1a1a1a1a1a1a3a~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "G2a1a1a1a1a1a1a1a1a3a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 77,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-FGC42062",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Y14468",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I2a1a2a1a1a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "I2a1a2a1a1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 41,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-Z2548",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-BY63832",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1b1a1b1a1a2a7~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2a7~",
+ "hits": 6
+ }
+ ],
+ "defining_snp_count": 15,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-Z2571",
+ "source_parent_status": "matched→R1b1a1b1a1a2a6 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Y150919",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1b1a1b1a1a2c1a3a2a1a2c1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2c1a3a2a1a2c1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 18,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-BY63832",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Y17221",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "G1a1a2",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "G1a1a2",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 96,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-DF27",
+ "source_parent_status": "matched→R1b1a1b1a1a2a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Y197002",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "C1a2a",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "C1a2a",
+ "hits": 3
+ },
+ {
+ "node": "C2b1a1a1a1a5a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 99,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-M12109",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FT51793",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "C1b2a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "C1b2a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 101,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-FT47952",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-CTS6519",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1b1a1b1a1a2a7~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2a7~",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 1,
+ "source_parent": "R1b-CTS9545",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-CTS13",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "C2b1b1e~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "C2b1b1e~",
+ "hits": 3
+ },
+ {
+ "node": "R1a1a1b2a4a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 111,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-Z1898",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-chrY:6804113 T->C",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1b1a1b1a1a2c1a4b5a2",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2c1a4b5a2",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 99,
+ "snps_known_to_foundation": 1,
+ "source_parent": "R1b-Z31644",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC17099",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I1a1b1a1e2f2b~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "I1a1b1a1e2f2b~",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a2a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 72,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-Z31644",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Y7402",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I2a1b1a2a1a1a2",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "I2a1b1a2a1a1a2",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 15,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-Y19522",
+ "source_parent_status": "matched→R1b1a1b1a1a2a4 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-BY19153",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1a1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 102,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-Y19522",
+ "source_parent_status": "matched→R1b1a1b1a1a2a4 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Y144477",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "L2",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "L2",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 134,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-P312",
+ "source_parent_status": "matched→R1b1a1b1a1a2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-A7970",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "O2a1b1a1a1a1b1a1b1a1a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "O2a1b1a1a1a1b1a1b1a1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 15,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-S206",
+ "source_parent_status": "matched→R1b1a1b1a1a2b2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-BY4040",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "J1a3~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "J1a3~",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 81,
+ "snps_known_to_foundation": 1,
+ "source_parent": "R1b-A7970",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-CTS188",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1b1a1b1a1a2b2a~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2b2a~",
+ "hits": 8
+ }
+ ],
+ "defining_snp_count": 27,
+ "snps_known_to_foundation": 8,
+ "source_parent": "R1b-A7970",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Y19233",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1b1b2b3b~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1b1b2b3b~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-FTA27217",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FGC57678",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "H1a1a4a",
+ "anchor_strength": 0.375,
+ "candidates": [
+ {
+ "node": "H1a1a4a",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a2a~",
+ "hits": 3
+ },
+ {
+ "node": "I2a1b1a2a1a1a1a1",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 90,
+ "snps_known_to_foundation": 8,
+ "source_parent": "R1b-Y17177",
+ "source_parent_status": "matched→R1b1a1b1a1a2b1c2a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-BY3554",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "G2a1a1a1a1a1a1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "G2a1a1a1a1a1a1",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 78,
+ "snps_known_to_foundation": 2,
+ "source_parent": "R1b-YSC0000193",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Y139280",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "C1a2a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "C1a2a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 90,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-L20",
+ "source_parent_status": "matched→R1b1a1b1a1a2b1a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-CTS9044",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1b1a1b1a1a2b1a2",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2b1a2",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a2b1a3~",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a2b1a4~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 12,
+ "snps_known_to_foundation": 9,
+ "source_parent": "R1b-S255",
+ "source_parent_status": "matched→R1b1a1b1a1a2b1a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FT303311",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I2a2a1b1b1a1a1",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "I2a2a1b1b1a1a1",
+ "hits": 3
+ },
+ {
+ "node": "R1a1a1b2a1a2c2a1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 75,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-Z275",
+ "source_parent_status": "matched→R1b1a1b1a1a2b1a2a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-CTS36",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "B2b3~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "B2b3~",
+ "hits": 3
+ },
+ {
+ "node": "I1a2a1a1a1a2b1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 126,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-PF6660",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-DF99",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1b1a1b1a1a2f",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2f",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 2,
+ "source_parent": "R1b-P312",
+ "source_parent_status": "matched→R1b1a1b1a1a2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FT345031",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "L1a2a1a2~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "L1a2a1a2~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 105,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-DF99",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-Y139461",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "Q1b",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "Q1b",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a4",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 129,
+ "snps_known_to_foundation": 6,
+ "source_parent": "R1b-P310",
+ "source_parent_status": "matched→R1b1a1b1a1 (62%)",
+ "is_backbone": false
+ },
+ {
+ "node": "R1b-FT407478",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "J1a2a1a2d2b2a1c~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "J1a2a1a2d2b2a1c~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 108,
+ "snps_known_to_foundation": 3,
+ "source_parent": "R1b-Y19469",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "I1a-Z2540",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "I1a2b",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "I1a2b",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 15,
+ "snps_known_to_foundation": 1,
+ "source_parent": "I1a-Z58",
+ "source_parent_status": "matched→I1a2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "I1a-FGC43913",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I1a2b1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "I1a2b1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 6,
+ "snps_known_to_foundation": 3,
+ "source_parent": "I1a-S2293",
+ "source_parent_status": "matched→I1a2b3~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "I1a-BY383",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "I1a2a1a1a5a1a~",
+ "anchor_strength": 0.375,
+ "candidates": [
+ {
+ "node": "I1a2a1a1a5a1a~",
+ "hits": 9
+ },
+ {
+ "node": "I1a2a1a1a5a~",
+ "hits": 9
+ },
+ {
+ "node": "I1a2a1a1a5a1~",
+ "hits": 3
+ },
+ {
+ "node": "I1a2a1a1a5~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 55,
+ "snps_known_to_foundation": 24,
+ "source_parent": "I1a-FGC57872",
+ "source_parent_status": "matched→I1a2a1a1a5~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "I1a-A1915",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "G2a2b2a4a1b",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "G2a2b2a4a1b",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 113,
+ "snps_known_to_foundation": 3,
+ "source_parent": "I1a-A1944",
+ "source_parent_status": "matched→I1a2a1a1a1a1a~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "I1a-FT318040",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "C1b2a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "C1b2a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 94,
+ "snps_known_to_foundation": 3,
+ "source_parent": "I1a-Y3866",
+ "source_parent_status": "matched→I1a1a~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "I1a-FT85559",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "I1a1a1b4a~",
+ "anchor_strength": 0.43859649122807015,
+ "candidates": [
+ {
+ "node": "I1a1a1b4a~",
+ "hits": 25
+ },
+ {
+ "node": "I1a1a1b4~",
+ "hits": 15
+ },
+ {
+ "node": "I1a1a1b~",
+ "hits": 6
+ },
+ {
+ "node": "B2b1a2a~",
+ "hits": 3
+ },
+ {
+ "node": "I1a1a1b4a1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 142,
+ "snps_known_to_foundation": 57,
+ "source_parent": "I1a-Y3866",
+ "source_parent_status": "matched→I1a1a~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "I1a-S9318",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "I1a1b1g3a~",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "I1a1b1g3a~",
+ "hits": 3
+ },
+ {
+ "node": "I1a1b1g3~",
+ "hits": 3
+ },
+ {
+ "node": "I1a1b1g~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 34,
+ "snps_known_to_foundation": 9,
+ "source_parent": "I1a-FGC41265",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "I2a-FGC3633",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "I2a1b1a2b1a2a1",
+ "anchor_strength": 0.23809523809523808,
+ "candidates": [
+ {
+ "node": "I2a1b1a2b1a2a1",
+ "hits": 5
+ },
+ {
+ "node": "I2a1b1a2b1a2a1a",
+ "hits": 5
+ },
+ {
+ "node": "I2a1b1a2b1a2a1a1a1a",
+ "hits": 5
+ },
+ {
+ "node": "I2a1b1a2b1a2a1a1a",
+ "hits": 3
+ },
+ {
+ "node": "I2a1b1a2b1a2a1a1a1",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 47,
+ "snps_known_to_foundation": 21,
+ "source_parent": "I2a-FGC3628",
+ "source_parent_status": "matched→I2a1b1a2b1a2a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "I2a-FT2426",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "I2a1b1a2b1a2a1a1a1a1",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "I2a1b1a2b1a2a1a1a1a1",
+ "hits": 3
+ },
+ {
+ "node": "I2a1b1a2b1a2a1a1a1a1a",
+ "hits": 3
+ },
+ {
+ "node": "Q1a1a1a1a1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 16,
+ "snps_known_to_foundation": 6,
+ "source_parent": "I2a-FGC3633",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "I2a-FT255558",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I2a1a1a1a1a1a1e5~",
+ "anchor_strength": 0.5384615384615384,
+ "candidates": [
+ {
+ "node": "I2a1a1a1a1a1a1e5~",
+ "hits": 28
+ },
+ {
+ "node": "I2a1a1a1a1a1a1f2~",
+ "hits": 15
+ },
+ {
+ "node": "I2a1a1a1a1a1a1e5e~",
+ "hits": 6
+ },
+ {
+ "node": "I2a1a1a1a1a1a1e~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 120,
+ "snps_known_to_foundation": 37,
+ "source_parent": "I2a-FGC93119",
+ "source_parent_status": "matched→I2a1a1a1a1a1a1f~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "I2a-FT58949",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "I2a1a1a1a1a1a2",
+ "anchor_strength": 0.34814814814814815,
+ "candidates": [
+ {
+ "node": "I2a1a1a1a1a1a2",
+ "hits": 47
+ },
+ {
+ "node": "I2a1a1a1a1a1a2a1a~",
+ "hits": 44
+ },
+ {
+ "node": "I2a1a1a1a1a1a2a~",
+ "hits": 37
+ },
+ {
+ "node": "I2a1a1a1a1a1a2a1~",
+ "hits": 3
+ },
+ {
+ "node": "Q1a2a2~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 260,
+ "snps_known_to_foundation": 132,
+ "source_parent": "I2a-Z105",
+ "source_parent_status": "matched→I2a1a1a1a1a1a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "I2a-Y7635",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "I2a1a1b1a1",
+ "anchor_strength": 0.3557692307692308,
+ "candidates": [
+ {
+ "node": "I2a1a1b1a1",
+ "hits": 111
+ },
+ {
+ "node": "I2a1a1b1a",
+ "hits": 85
+ },
+ {
+ "node": "I2a1a1b1",
+ "hits": 67
+ },
+ {
+ "node": "I2a1a1b",
+ "hits": 22
+ },
+ {
+ "node": "D1a2a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 874,
+ "snps_known_to_foundation": 294,
+ "source_parent": "I2a-FGC56761",
+ "source_parent_status": "matched→I2a1a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J2b-Z2523",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "J2b2a1a1a1a1a",
+ "anchor_strength": 0.46153846153846156,
+ "candidates": [
+ {
+ "node": "J2b2a1a1a1a1a",
+ "hits": 6
+ },
+ {
+ "node": "J2b2a1a1a1a~",
+ "hits": 4
+ },
+ {
+ "node": "J2b2a1a1a1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 26,
+ "snps_known_to_foundation": 13,
+ "source_parent": "J2b-Z8418",
+ "source_parent_status": "matched→J2b2a1a1a~ (75%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J2b-Z631",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "J2b2a1a1a1a1a1a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "J2b2a1a1a1a1a1a",
+ "hits": 18
+ }
+ ],
+ "defining_snp_count": 46,
+ "snps_known_to_foundation": 18,
+ "source_parent": "J2b-Z1295",
+ "source_parent_status": "matched→J2b2a1a1a1a1a1b~ (50%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J2a-FT71373",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "O2a2b2a1b2",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "O2a2b2a1b2",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 141,
+ "snps_known_to_foundation": 2,
+ "source_parent": "J2a-PF5084",
+ "source_parent_status": "matched→J2a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J2a-SK1314",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1a1b",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1a1b",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 276,
+ "snps_known_to_foundation": 2,
+ "source_parent": "J2a-FT71373",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "J2a-Z37967",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I2a2",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "I2a2",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 162,
+ "snps_known_to_foundation": 3,
+ "source_parent": "J2a-Z35827",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "J2a-FGC75665",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "J2a1a1a2b2a1a1c2b1~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "J2a1a1a2b2a1a1c2b1~",
+ "hits": 1
+ },
+ {
+ "node": "J2a1a1a2b2a1a1c2b~",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 95,
+ "snps_known_to_foundation": 2,
+ "source_parent": "J2a-FGC75666",
+ "source_parent_status": "matched→J2a1a1a2b2a1a1c2~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J2a-Y3019",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "J2a1a1a2b2a3b1b~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "J2a1a1a2b2a3b1b~",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 14,
+ "snps_known_to_foundation": 1,
+ "source_parent": "J2a-Z6251",
+ "source_parent_status": "matched→J2a1a1a2b2a3b1~ (75%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J2a-FT280289",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "J2a1a1a2b2a2b3a2a1a~",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "J2a1a1a2b2a2b3a2a1a~",
+ "hits": 3
+ },
+ {
+ "node": "J2a1a1a2b2a2b3a2a1~",
+ "hits": 3
+ },
+ {
+ "node": "J2a1a1a2b2a2b3a2~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 200,
+ "snps_known_to_foundation": 9,
+ "source_parent": "J2a-L742",
+ "source_parent_status": "matched→J2a1a1a2b2a2b3 (71%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J2a-FGC15781",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "J2a1a1a2a1a~",
+ "anchor_strength": 0.3,
+ "candidates": [
+ {
+ "node": "J2a1a1a2a1a~",
+ "hits": 3
+ },
+ {
+ "node": "J2a1a1a2a1~",
+ "hits": 3
+ },
+ {
+ "node": "G2a2b2a1a1b1a1a1a1a2",
+ "hits": 2
+ },
+ {
+ "node": "H3a1",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 149,
+ "snps_known_to_foundation": 10,
+ "source_parent": "J2a-Z6065",
+ "source_parent_status": "matched→J2a1a1a2a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J2a-Z27921",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1b1b1a1",
+ "anchor_strength": 0.17647058823529413,
+ "candidates": [
+ {
+ "node": "E1b1b1b1a1",
+ "hits": 3
+ },
+ {
+ "node": "I",
+ "hits": 3
+ },
+ {
+ "node": "J2a1a1a2a1a1a~",
+ "hits": 3
+ },
+ {
+ "node": "J2a1a1a2a1a1~",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a1c2b2a1b5a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 490,
+ "snps_known_to_foundation": 17,
+ "source_parent": "J2a-FGC15781",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "J2a-FT3329",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "J2a1a1a2a2b",
+ "anchor_strength": 0.2916666666666667,
+ "candidates": [
+ {
+ "node": "J2a1a1a2a2b",
+ "hits": 7
+ },
+ {
+ "node": "J2a1a1a2a2",
+ "hits": 6
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a2~",
+ "hits": 3
+ },
+ {
+ "node": "J2a1a1a2a2b2a~",
+ "hits": 3
+ },
+ {
+ "node": "J2a1a1a2a2b2~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 532,
+ "snps_known_to_foundation": 24,
+ "source_parent": "J2a-Z6065",
+ "source_parent_status": "matched→J2a1a1a2a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J2a-Z7372",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "R1a1a1b1a1a1b~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "R1a1a1b1a1a1b~",
+ "hits": 2
+ },
+ {
+ "node": "R1b1a1b1b1",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 364,
+ "snps_known_to_foundation": 4,
+ "source_parent": "J2a-PF5197",
+ "source_parent_status": "matched→J2a1a1b1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J2a-Z6092",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "J1a2a1a2d2b2b2c4d2a2a5",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a5",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 278,
+ "snps_known_to_foundation": 3,
+ "source_parent": "J2a-Z7294",
+ "source_parent_status": "matched→J2a1a1b1a1a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J2a-FT62577",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "J2b2a1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "J2b2a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 97,
+ "snps_known_to_foundation": 3,
+ "source_parent": "J2a-Z7294",
+ "source_parent_status": "matched→J2a1a1b1a1a (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J2a-Z7687",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "J2a1a1b2a1",
+ "anchor_strength": 0.49122807017543857,
+ "candidates": [
+ {
+ "node": "J2a1a1b2a1",
+ "hits": 28
+ },
+ {
+ "node": "J2a1a1b2a",
+ "hits": 27
+ },
+ {
+ "node": "R1a1a1b1a2b3a3a2h1a~",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 225,
+ "snps_known_to_foundation": 57,
+ "source_parent": "J2a-Z7680",
+ "source_parent_status": "matched→J2a1a1b2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J2a-A25182",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "C1a2b",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "C1a2b",
+ "hits": 3
+ },
+ {
+ "node": "J2a1a1b2a1b1b3a2a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 132,
+ "snps_known_to_foundation": 6,
+ "source_parent": "J2a-Y24651",
+ "source_parent_status": "matched→J2a1a1b2a1b1b3a2~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J2a-MF89074",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "L1",
+ "anchor_strength": 0.6,
+ "candidates": [
+ {
+ "node": "L1",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a2c1a4b2a1a1a",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 216,
+ "snps_known_to_foundation": 5,
+ "source_parent": "J2a-FGC70845",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "J2a-FT316587",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1a1a1a2a1b2~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "E1b1a1a1a2a1b2~",
+ "hits": 3
+ },
+ {
+ "node": "G2a2b2a1a1b1a1a3",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 223,
+ "snps_known_to_foundation": 6,
+ "source_parent": "J2a-FT178882",
+ "source_parent_status": "matched→J2a1a1b2a1a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J2a-PF4993",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "J2a2a",
+ "anchor_strength": 0.4634146341463415,
+ "candidates": [
+ {
+ "node": "J2a2a",
+ "hits": 19
+ },
+ {
+ "node": "E1a2a1b1",
+ "hits": 3
+ },
+ {
+ "node": "H1a1a4a",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a5a1e2a1a1~",
+ "hits": 3
+ },
+ {
+ "node": "J2a2a1a1a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 824,
+ "snps_known_to_foundation": 41,
+ "source_parent": "J2a-PF5030",
+ "source_parent_status": "matched→J2a2a (88%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J1a-CTS130",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "J1a2a1a2d2b2b2c4c4a2a1~",
+ "anchor_strength": 0.4,
+ "candidates": [
+ {
+ "node": "J1a2a1a2d2b2b2c4c4a2a1~",
+ "hits": 6
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4c4a2a~",
+ "hits": 6
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4c4a2~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 167,
+ "snps_known_to_foundation": 15,
+ "source_parent": "J1a-Z2291",
+ "source_parent_status": "matched→J1a2a1a2d2b2b2c4c4~ (80%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J1a-FGC12808",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "J1a2a1a2d2b2b2c4e~",
+ "anchor_strength": 0.5625,
+ "candidates": [
+ {
+ "node": "J1a2a1a2d2b2b2c4e~",
+ "hits": 18
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4a1~",
+ "hits": 12
+ },
+ {
+ "node": "G2a2a1a2a1",
+ "hits": 1
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4e1~",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 157,
+ "snps_known_to_foundation": 32,
+ "source_parent": "J1a-FGC12806",
+ "source_parent_status": "matched→J1a2a1a2d2b2b2c4a~ (75%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J1a-Y5585",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "J1a2a1a2d2b2b2c4d2~",
+ "anchor_strength": 0.3684210526315789,
+ "candidates": [
+ {
+ "node": "J1a2a1a2d2b2b2c4d2~",
+ "hits": 7
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a~",
+ "hits": 6
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2~",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 30,
+ "snps_known_to_foundation": 19,
+ "source_parent": "J1a-BY6660",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "J1a-Y67920",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "J1a2a1a2d2b2b2c4d2a2a5a1b~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a5a1b~",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 1,
+ "source_parent": "J1a-FGC4257",
+ "source_parent_status": "matched→J1a2a1a2d2b2b2c4d2a2a5a~ (81%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J1a-FGC3",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "J1a2a1a2d2b2b2c4d2a2a5",
+ "anchor_strength": 0.16666666666666666,
+ "candidates": [
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a5",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a5a1e4a5a~",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a5a1e4a5b1~",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a5a1e4a5~",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4d2a2a5a1e4a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 15,
+ "snps_known_to_foundation": 15,
+ "source_parent": "J1a-Y9271",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "J1a-FT281164",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "C1b1a1a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "C1b1a1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 17,
+ "snps_known_to_foundation": 3,
+ "source_parent": "J1a-FGC60122",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "J1a-FTA27241",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "J1a2a1a2d2b2b2c4b2a~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "J1a2a1a2d2b2b2c4b2a~",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 194,
+ "snps_known_to_foundation": 1,
+ "source_parent": "J1a-FGC15941",
+ "source_parent_status": "matched→J1a2a1a2d2b2b2c4b2~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J1a-FGC35109",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "J1a2a1a2d2b2b2c4b1c1~",
+ "anchor_strength": 0.47058823529411764,
+ "candidates": [
+ {
+ "node": "J1a2a1a2d2b2b2c4b1c1~",
+ "hits": 16
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4b1c3~",
+ "hits": 9
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4b1c3a1~",
+ "hits": 6
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4b1c2~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 117,
+ "snps_known_to_foundation": 28,
+ "source_parent": "J1a-Y3441",
+ "source_parent_status": "matched→J1a2a1a2d2b2b2c4b1c~ (60%)",
+ "is_backbone": false
+ },
+ {
+ "node": "J1a-FGC86304",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "J1a2a1a2d2b2b2c4b1c3a1a~",
+ "anchor_strength": 0.5714285714285714,
+ "candidates": [
+ {
+ "node": "J1a2a1a2d2b2b2c4b1c3a1a~",
+ "hits": 12
+ },
+ {
+ "node": "J1a2a1a2d2b2b2c4b1c3a1a1~",
+ "hits": 9
+ }
+ ],
+ "defining_snp_count": 43,
+ "snps_known_to_foundation": 21,
+ "source_parent": "J1a-FGC35109",
+ "source_parent_status": "flag_weak",
+ "is_backbone": false
+ },
+ {
+ "node": "J1a-Z27661",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1b1a1b1a1a2b1d1a~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1b1a1b1a1a2b1d1a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 263,
+ "snps_known_to_foundation": 3,
+ "source_parent": "J1a-Z2312",
+ "source_parent_status": "matched→J1a2a1a2d2b2b2~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "D-PH4",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "D1a1a1",
+ "anchor_strength": 0.4435028248587571,
+ "candidates": [
+ {
+ "node": "D1a1a1",
+ "hits": 157
+ },
+ {
+ "node": "D1a1a1a1",
+ "hits": 140
+ },
+ {
+ "node": "D1a1a1a1a",
+ "hits": 9
+ },
+ {
+ "node": "D1a1a1a1a1b",
+ "hits": 9
+ },
+ {
+ "node": "D1a1",
+ "hits": 6
+ }
+ ],
+ "defining_snp_count": 1381,
+ "snps_known_to_foundation": 351,
+ "source_parent": "D",
+ "source_parent_status": "matched→D1 (90%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E2-FT322364",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E2b1a1d",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E2b1a1d",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 12,
+ "snps_known_to_foundation": 1,
+ "source_parent": "E2-V1001",
+ "source_parent_status": "matched→E2b1a1 (98%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1a-ACT19",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1a2c~",
+ "anchor_strength": 0.75,
+ "candidates": [
+ {
+ "node": "E1a2c~",
+ "hits": 9
+ },
+ {
+ "node": "N1a1a1a1a2a1a1a1a1a1a1c~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 350,
+ "snps_known_to_foundation": 12,
+ "source_parent": "E1a-Z15084",
+ "source_parent_status": "matched→E1a2b (97%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1a-PF7332",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "J2b1b2a~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "J2b1b2a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 135,
+ "snps_known_to_foundation": 3,
+ "source_parent": "E1a-Z15172",
+ "source_parent_status": "matched→E1a2a2 (97%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-Y161059",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I2a1b1a1b1a1a2b",
+ "anchor_strength": 0.6,
+ "candidates": [
+ {
+ "node": "I2a1b1a1b1a1a2b",
+ "hits": 3
+ },
+ {
+ "node": "R1a1a1b1a2a2a1b~",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 261,
+ "snps_known_to_foundation": 5,
+ "source_parent": "E1b-V264",
+ "source_parent_status": "matched→E1b1b1a1a2 (67%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-V1174",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1b1a1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1b1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 37,
+ "snps_known_to_foundation": 3,
+ "source_parent": "E1b-Y31640",
+ "source_parent_status": "matched→E1b1b1a1a2a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-S1954",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1b1a1a2",
+ "anchor_strength": 0.3333333333333333,
+ "candidates": [
+ {
+ "node": "E1b1b1a1a2",
+ "hits": 3
+ },
+ {
+ "node": "I1a2a1a1a1a",
+ "hits": 3
+ },
+ {
+ "node": "J1a2a1a2c1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 75,
+ "snps_known_to_foundation": 9,
+ "source_parent": "E1b-V1174",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-MF736421",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1b1a1a1c1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1b1a1a1c1",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 1,
+ "source_parent": "E1b-FGC2177",
+ "source_parent_status": "matched→E1b1b1a1a1c1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-Y2846",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1b1a1a1c1b1a1a~",
+ "anchor_strength": 0.5714285714285714,
+ "candidates": [
+ {
+ "node": "E1b1b1a1a1c1b1a1a~",
+ "hits": 12
+ },
+ {
+ "node": "E1b1b1a1a1c1b1a~",
+ "hits": 6
+ },
+ {
+ "node": "E1b1b1a1a1c1b1a1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 55,
+ "snps_known_to_foundation": 21,
+ "source_parent": "E1b-Y2881",
+ "source_parent_status": "matched→E1b1b1a1a1c1b1c~ (58%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-V4490",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1b1a1a1c3~",
+ "anchor_strength": 0.5384615384615384,
+ "candidates": [
+ {
+ "node": "E1b1b1a1a1c3~",
+ "hits": 7
+ },
+ {
+ "node": "E1b1a1b",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a2c1a5d3",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 175,
+ "snps_known_to_foundation": 13,
+ "source_parent": "E1b-FGC2177",
+ "source_parent_status": "matched→E1b1b1a1a1c1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-V4258",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1b1a1a1b1",
+ "anchor_strength": 0.45901639344262296,
+ "candidates": [
+ {
+ "node": "E1b1b1a1a1b1",
+ "hits": 28
+ },
+ {
+ "node": "E1b1b1a1a1b1a3",
+ "hits": 20
+ },
+ {
+ "node": "E1b1b1a1a1b1a~",
+ "hits": 10
+ },
+ {
+ "node": "L1a1b3a2b~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 187,
+ "snps_known_to_foundation": 61,
+ "source_parent": "E1b-Z21175",
+ "source_parent_status": "matched→E1b1b1a1a1b1a3 (67%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-PF2234",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1b1a1b1",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1b1a1b1",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 1,
+ "source_parent": "E1b-Y4359",
+ "source_parent_status": "matched→E1b1b1a1b1 (94%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-FT318574",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "G2a2b2a1a1c4",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "G2a2b2a1a1c4",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 289,
+ "snps_known_to_foundation": 3,
+ "source_parent": "E1b-FT38444",
+ "source_parent_status": "matched→E1b1b1a1b2a4b1~ (50%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-FT208247",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1b1b1a1h1~",
+ "anchor_strength": 0.42857142857142855,
+ "candidates": [
+ {
+ "node": "E1b1b1b1a1h1~",
+ "hits": 6
+ },
+ {
+ "node": "E1b1b1b1a1h~",
+ "hits": 6
+ },
+ {
+ "node": "E1b1a1a1a1c1a1a3a1d1b",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 110,
+ "snps_known_to_foundation": 14,
+ "source_parent": "E1b-A930",
+ "source_parent_status": "matched→E1b1b1b1a1h~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-A2227",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1b1b1a1f2b~",
+ "anchor_strength": 0.35294117647058826,
+ "candidates": [
+ {
+ "node": "E1b1b1b1a1f2b~",
+ "hits": 6
+ },
+ {
+ "node": "E1b1b1b1a1f~",
+ "hits": 4
+ },
+ {
+ "node": "E1b1b1b1a1f2a~",
+ "hits": 3
+ },
+ {
+ "node": "E1b1b1b1a2~",
+ "hits": 3
+ },
+ {
+ "node": "E1b1b1b1a1f2~",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 106,
+ "snps_known_to_foundation": 17,
+ "source_parent": "E1b-M5198",
+ "source_parent_status": "matched→E1b1b1b1a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-Y141591",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1b1b1a2~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "E1b1b1b1a2~",
+ "hits": 6
+ },
+ {
+ "node": "B2b1a2a~",
+ "hits": 3
+ },
+ {
+ "node": "G2a2b2a1a1a1b1a2a1",
+ "hits": 2
+ },
+ {
+ "node": "C1b1a2a",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 118,
+ "snps_known_to_foundation": 12,
+ "source_parent": "E1b-Z21096",
+ "source_parent_status": "matched→E1b1b1b1a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-Y4975",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1b1b2a1a6d~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1b1b2a1a6d~",
+ "hits": 9
+ }
+ ],
+ "defining_snp_count": 9,
+ "snps_known_to_foundation": 9,
+ "source_parent": "E1b-Z838",
+ "source_parent_status": "matched→E1b1b1b2a1a5~ (52%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-Z20936",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1b1b2a1a6~",
+ "anchor_strength": 0.6575342465753424,
+ "candidates": [
+ {
+ "node": "E1b1b1b2a1a6~",
+ "hits": 48
+ },
+ {
+ "node": "E1b1b1b2a1a~",
+ "hits": 9
+ },
+ {
+ "node": "E1b1b1b2a1a6c1~",
+ "hits": 7
+ },
+ {
+ "node": "E1b1b1b2a1a6c~",
+ "hits": 3
+ },
+ {
+ "node": "I1a10b2a~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 211,
+ "snps_known_to_foundation": 70,
+ "source_parent": "E1b-Z838",
+ "source_parent_status": "matched→E1b1b1b2a1a5~ (52%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-Y17226",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1b1b2a1a1a1a1b2b1",
+ "anchor_strength": 0.4,
+ "candidates": [
+ {
+ "node": "E1b1b1b2a1a1a1a1b2b1",
+ "hits": 6
+ },
+ {
+ "node": "E1b1b1b2a1a1a1a1b2b",
+ "hits": 3
+ },
+ {
+ "node": "E1b1b1b2a1a1a1a1b2b1a",
+ "hits": 3
+ },
+ {
+ "node": "E1b1b1b2a1a1a1a1b2b1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 17,
+ "snps_known_to_foundation": 15,
+ "source_parent": "E1b-Y15423",
+ "source_parent_status": "matched→E1b1b1b2a1a1a1a1b2~ (98%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-FGC7911",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "I1a1b1a4a2e1a1~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "I1a1b1a4a2e1a1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 255,
+ "snps_known_to_foundation": 3,
+ "source_parent": "E1b-FGC18372",
+ "source_parent_status": "matched→E1b1b1b2a1a1a1a1a1a~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-Z20966",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1b1b2a1a1a1a1f1~",
+ "anchor_strength": 0.6666666666666666,
+ "candidates": [
+ {
+ "node": "E1b1b1b2a1a1a1a1f1~",
+ "hits": 6
+ },
+ {
+ "node": "E1b1b1b2a1a1a1a1f~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 24,
+ "snps_known_to_foundation": 9,
+ "source_parent": "E1b-Z20968",
+ "source_parent_status": "matched→E1b1b1b2a1a1a1a1e~ (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-Z20984",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1b1b2a1a1a1a1f1b1a",
+ "anchor_strength": 0.42105263157894735,
+ "candidates": [
+ {
+ "node": "E1b1b1b2a1a1a1a1f1b1a",
+ "hits": 24
+ },
+ {
+ "node": "E1b1b1b2a1a1a1a1f1b",
+ "hits": 15
+ },
+ {
+ "node": "E1b1b1b2a1a1a1a1f1b1",
+ "hits": 9
+ },
+ {
+ "node": "D1",
+ "hits": 3
+ },
+ {
+ "node": "E1b1b1b2a1a1a1a1f1b1a1",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 85,
+ "snps_known_to_foundation": 51,
+ "source_parent": "E1b-Z20966",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-Z20900",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1b1b2b2a1a1~",
+ "anchor_strength": 0.4444444444444444,
+ "candidates": [
+ {
+ "node": "E1b1b1b2b2a1a1~",
+ "hits": 24
+ },
+ {
+ "node": "E1b1b1b2b2a1a~",
+ "hits": 18
+ },
+ {
+ "node": "E1b1b1b2b2a1a1a",
+ "hits": 9
+ },
+ {
+ "node": "E1b1b1b2b",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 151,
+ "snps_known_to_foundation": 54,
+ "source_parent": "E1b-L364",
+ "source_parent_status": "matched→E1b1b1b2b2a1a~ (71%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-MF121627",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E",
+ "anchor_strength": 0.75,
+ "candidates": [
+ {
+ "node": "E",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a2c1a6b~",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 144,
+ "snps_known_to_foundation": 4,
+ "source_parent": "E1b-CTS275",
+ "source_parent_status": "matched→E1b1a1a1a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-Y81422",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 156,
+ "snps_known_to_foundation": 3,
+ "source_parent": "E1b-Z36529",
+ "source_parent_status": "matched→E1b1a1a1a1c4~ (75%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-Z22359",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1a1a1a1c1a1a3e",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1a1a1a1c1a1a3e",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 2,
+ "snps_known_to_foundation": 2,
+ "source_parent": "E1b-Z5962",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-Z1656",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1a1a1a1c1a1a3a1d1b1",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "E1b1a1a1a1c1a1a3a1d1b1",
+ "hits": 6
+ },
+ {
+ "node": "E1b1a1a1a1c1a1a3a1d1b1b1",
+ "hits": 6
+ }
+ ],
+ "defining_snp_count": 17,
+ "snps_known_to_foundation": 6,
+ "source_parent": "E1b-Z22617",
+ "source_parent_status": "matched→E1b1a1a1a1c1a1a3a1d1b1b (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-FT206082",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "R1a1a1b1a3a3a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "R1a1a1b1a3a3a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 88,
+ "snps_known_to_foundation": 3,
+ "source_parent": "E1b-FT52771",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-FT325004",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "E1b1a1a1a1c1a1a3a1d1b1a1~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1a1a1a1c1a1a3a1d1b1a1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 19,
+ "snps_known_to_foundation": 3,
+ "source_parent": "E1b-FT206082",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-Y196451",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1a1a1a1c1a1a3a1c",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1a1a1a1c1a1a3a1c",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 1,
+ "source_parent": "E1b-Z1704",
+ "source_parent_status": "matched→E1b1a1a1a1c1a1a3a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-PF7223",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1a1a1a1c1a1a3c1b",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1a1a1a1c1a1a3c1b",
+ "hits": 1
+ }
+ ],
+ "defining_snp_count": 3,
+ "snps_known_to_foundation": 1,
+ "source_parent": "E1b-Z1705",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-V4311",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1b1a1b1a",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "E1b1b1a1b1a",
+ "hits": 2
+ },
+ {
+ "node": "H1a2b1a",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 177,
+ "snps_known_to_foundation": 4,
+ "source_parent": "E1b-CTS275",
+ "source_parent_status": "matched→E1b1a1a1a1 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-FT399996",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "G2a2b1a1a1a1a1~",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "G2a2b1a1a1a1a1~",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 42,
+ "snps_known_to_foundation": 3,
+ "source_parent": "E1b-FT399583",
+ "source_parent_status": "novel",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-Z5953",
+ "category": "weak_plurality",
+ "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).",
+ "best_anchor": "E1b1a1a1a2a1a3b1a2a",
+ "anchor_strength": 1.0,
+ "candidates": [
+ {
+ "node": "E1b1a1a1a2a1a3b1a2a",
+ "hits": 2
+ }
+ ],
+ "defining_snp_count": 6,
+ "snps_known_to_foundation": 2,
+ "source_parent": "E1b-Z1725",
+ "source_parent_status": "matched→E1b1a1a1a2a1a3b1a2 (100%)",
+ "is_backbone": false
+ },
+ {
+ "node": "E1b-Y55741",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "G2a2b2a1a1b1a1a2b1b3~",
+ "anchor_strength": 0.5,
+ "candidates": [
+ {
+ "node": "G2a2b2a1a1b1a1a2b1b3~",
+ "hits": 3
+ },
+ {
+ "node": "R1b1a1b1a1a1c2b2a1b1a",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 219,
+ "snps_known_to_foundation": 6,
+ "source_parent": "E1b-V4257",
+ "source_parent_status": "matched→E1b1a1a1b~ (57%)",
+ "is_backbone": false
+ },
+ {
+ "node": "A00",
+ "category": "parent_inconsistent",
+ "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).",
+ "best_anchor": "A00",
+ "anchor_strength": 0.9720149253731343,
+ "candidates": [
+ {
+ "node": "A00",
+ "hits": 3126
+ },
+ {
+ "node": "C1a1",
+ "hits": 6
+ },
+ {
+ "node": "A00b",
+ "hits": 3
+ },
+ {
+ "node": "B3",
+ "hits": 3
+ },
+ {
+ "node": "C",
+ "hits": 3
+ }
+ ],
+ "defining_snp_count": 6548,
+ "snps_known_to_foundation": 3210,
+ "source_parent": "Y",
+ "source_parent_status": "flag_weak",
+ "is_backbone": true
+ },
+ {
+ "node": "BT",
+ "category": "name_collision",
+ "reason": "Source node name matches an existing foundation node but defines different SNPs (no SNP overlap) — reconcile or rename.",
+ "best_anchor": "BT",
+ "anchor_strength": 0.0,
+ "candidates": [],
+ "defining_snp_count": 1117,
+ "snps_known_to_foundation": 0,
+ "source_parent": "A1b",
+ "source_parent_status": "flag_parent_inconsistent",
+ "is_backbone": true
+ },
+ {
+ "node": "I1b",
+ "category": "name_collision",
+ "reason": "Source node name matches an existing foundation node but defines different SNPs (no SNP overlap) — reconcile or rename.",
+ "best_anchor": "I1b",
+ "anchor_strength": 0.0,
+ "candidates": [],
+ "defining_snp_count": 4,
+ "snps_known_to_foundation": 0,
+ "source_parent": "I1",
+ "source_parent_status": "matched→I1 (97%)",
+ "is_backbone": false
+ }
+ ],
+ "graft_blocked": [
+ "A0-V151",
+ "A1a-V4436",
+ "A1a-Z11346",
+ "A1a-Z11371",
+ "A1a-S15194",
+ "A1a-Z38529",
+ "A1b1-V1018",
+ "A1b1-V4244",
+ "A1b1-Y23900",
+ "A1b1-Y23869",
+ "A1b1-Y156153",
+ "A1b1-V7285",
+ "A1b1-V5912",
+ "A1b1-Y30506",
+ "A1b1-V5321",
+ "A1b1-BY16278",
+ "A1b1-FGC38354",
+ "A1b1-V1879",
+ "A1b1-Y161035",
+ "A1b1-V1559",
+ "A1b1-M9705",
+ "A1b1-Y76912",
+ "A1b1-Y161020",
+ "B-M6503",
+ "B-FT334006",
+ "C-F15201",
+ "G-Y197456",
+ "G-chrY:25320800 G->A",
+ "O-FGC86",
+ "O-FT272461",
+ "O-FTB26900",
+ "O-CP086569.2:12299902 C->G",
+ "O-BY153273",
+ "N-CTS277",
+ "N-FT324649",
+ "S-F17185",
+ "P2",
+ "R2-FT327021",
+ "R2-FGC56232",
+ "R2-FGC61992",
+ "R2-FGC61453",
+ "R1a-Y183609",
+ "R1a-S7737",
+ "R1b-FT300167",
+ "R1b-Y21408",
+ "R1b-S9535",
+ "R1b-Y8604",
+ "R1b-BY68252",
+ "R1b-chrY:5321421 C->T",
+ "R1b-BY25301",
+ "R1b-BY25309",
+ "R1b-chrY:25774450 A->G",
+ "R1b-A7209",
+ "R1b-Y94610",
+ "R1b-FGC34162",
+ "R1b-Y61666",
+ "R1b-FGC52315",
+ "R1b-S8350",
+ "R1b-FGC15226",
+ "R1b-BY675",
+ "R1b-A24483",
+ "R1b-FGC62105",
+ "R1b-A11391",
+ "R1b-ZS10825",
+ "R1b-FGC13773",
+ "R1b-FT2802",
+ "R1b-A9904",
+ "R1b-FGC29291",
+ "R1b-BY42407",
+ "R1b-A27511",
+ "R1b-Z17687",
+ "R1b-Y83959",
+ "R1b-A10891",
+ "R1b-BY11465",
+ "R1b-CTS606",
+ "R1b-Z29713",
+ "R1b-FT299988",
+ "R1b-PH2007",
+ "R1b-CTS8001",
+ "R1b-Y22894",
+ "R1b-Y22889",
+ "R1b-Y30858",
+ "R1b-BY208342",
+ "R1b-BY202532",
+ "R1b-CTS416",
+ "R1b-FTA51551",
+ "R1b-Y20968",
+ "R1b-FT173909",
+ "R1b-FT300231",
+ "R1b-FGC60524",
+ "R1b-Y15850",
+ "R1b-Y176774",
+ "R1b-BY63479",
+ "R1b-BY59223",
+ "R1b-Y228967",
+ "R1b-Y139200",
+ "R1b-S16136",
+ "I2a-FT73935",
+ "J2b-CP086569.2:25487852 C->T",
+ "J2a-Y60112",
+ "J2a-Z35822",
+ "J2a-Z35827",
+ "J2a-FT3472",
+ "J2a-FT294597",
+ "J2a-Z7433",
+ "J2a-Z7391",
+ "J2a-FT3373",
+ "J2a-FT171820",
+ "J2a-FT171833",
+ "J2a-Z28070",
+ "J2a-FT317295",
+ "J2a-FT324728",
+ "J2a-Z7274",
+ "J2a-Z7261",
+ "J2a-CP086569.2:12398698 T->G",
+ "J1a-FT316616",
+ "J1a-FT283260",
+ "J1a-ZS5596",
+ "J1a-FT299872",
+ "J1a-FT299874",
+ "J1a-BY89355",
+ "J1a-FT14822",
+ "E1b-Z6370",
+ "E1b-A18841",
+ "E1b-Z5018",
+ "E1b-Y145455",
+ "E1b-FGC3479",
+ "E1b-K695",
+ "E1b-Y161200",
+ "E1b-FT400626"
+ ]
+}
\ No newline at end of file
diff --git a/documents/planning/appview-pds-backfeed-system.md b/documents/planning/appview-pds-backfeed-system.md
deleted file mode 100644
index e544feb8..00000000
--- a/documents/planning/appview-pds-backfeed-system.md
+++ /dev/null
@@ -1,1346 +0,0 @@
-# AppView-to-PDS Backfeed System
-
-## Executive Summary
-
-This document describes the **backfeed methodology** for keeping researcher/citizen PDS records synchronized with computed and curated data from the DecodingUs AppView. When DecodingUs refines haplogroup assignments, discovers new branches, identifies potential matches, or updates any derived data, these changes must flow back to the user's PDS so they always have the most current metadata.
-
----
-
-## Problem Statement
-
-The current Atmosphere architecture is primarily **unidirectional**:
-
-```
-Researcher/Citizen PDS → Firehose → DecodingUs AppView → Database
-```
-
-However, DecodingUs performs significant post-ingestion processing on **metadata only**:
-
-1. **Haplogroup Refinement**: Tree updates may refine `R-L21` to `R-L21>FT54321`
-2. **Branch Discovery**: Private variants may be promoted to official branches
-3. **Ancestral STR Reconstruction**: Compute modal STR haplotypes for tree branches using submitted STR profiles
-4. **TMRCA Estimation**: Age estimates computed from STR variance across the network
-5. **Potential Match Discovery**: Identify potential genetic matches across the network for user exploration
-6. **Confirmed Match Stamping**: Record when both parties agree on a match result
-7. **Lab Inference**: Sequencer instrument-to-lab mappings from metadata
-
-### Edge Computing Model
-
-**Critical Architecture Principle**: Raw genomic data (BAM/CRAM/VCF/genotype files) **never** flows to DecodingUs. All raw data analysis happens locally in the Navigator Workbench:
-
-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│ EDGE COMPUTING ARCHITECTURE │
-├─────────────────────────────────────────────────────────────────────────┤
-│ │
-│ ┌─────────────────────────────────────────────────────────────────┐ │
-│ │ Navigator Workbench (Edge) │ │
-│ │ │ │
-│ │ Raw Data Analysis (LOCAL ONLY - never transmitted): │ │
-│ │ • BAM/CRAM alignment and coverage metrics │ │
-│ │ • Variant calling from sequence data │ │
-│ │ • Haplogroup determination (Y-DNA, mtDNA) │ │
-│ │ • STR extraction from WGS │ │
-│ │ • Ancestry composition / admixture analysis │ │
-│ │ • IBD segment detection (autosomal) │ │
-│ │ │ │
-│ │ Output → Summary metadata synced to PDS │ │
-│ │ │ │
-│ └──────────────────────────────┬───────────────────────────────────┘ │
-│ │ │
-│ ▼ │
-│ ┌─────────────────────────────────────────────────────────────────┐ │
-│ │ User's PDS (Metadata Only) │ │
-│ │ │ │
-│ │ • biosample (haplogroup assignments, coverage stats) │ │
-│ │ • strProfile (STR marker values - needed for tree building) │ │
-│ │ • alignment (metrics summary, not raw alignments) │ │
-│ │ • populationBreakdown (admixture percentages) │ │
-│ │ • Private Y-DNA/mtDNA SNPs (for branch discovery) │ │
-│ │ │ │
-│ └──────────────────────────────┬───────────────────────────────────┘ │
-│ │ │
-│ ▼ │
-│ ┌─────────────────────────────────────────────────────────────────┐ │
-│ │ DecodingUs AppView │ │
-│ │ │ │
-│ │ Aggregation & Network Intelligence: │ │
-│ │ • Haplogroup tree refinement (from network-wide SNP data) │ │
-│ │ • Ancestral STR reconstruction (from submitted STR profiles) │ │
-│ │ • TMRCA estimation (from STR variance across samples) │ │
-│ │ • Potential match identification (metadata comparison) │ │
-│ │ • Branch discovery consensus (aggregate private variants) │ │
-│ │ │ │
-│ │ NEVER receives: BAM, CRAM, VCF, FASTQ, raw genotype files │ │
-│ │ │ │
-│ └─────────────────────────────────────────────────────────────────┘ │
-│ │
-└─────────────────────────────────────────────────────────────────────────┘
-```
-
-### Data That DOES Flow to DecodingUs (via PDS)
-
-| Data Type | Purpose | Why Needed |
-|:---|:---|:---|
-| Haplogroup assignments | Tree placement | Network-wide refinement |
-| Private Y-DNA SNPs | Branch discovery | Consensus detection for new branches |
-| Private mtDNA SNPs | Branch discovery | Consensus detection for new branches |
-| STR marker values | Ancestral reconstruction | Modal haplotype & TMRCA calculation |
-| Coverage/quality metrics | Sample characterization | Match quality assessment |
-| Ancestry percentages | Population context | Computed locally, shared as summary |
-
-### Data That NEVER Flows to DecodingUs
-
-| Data Type | Why Excluded |
-|:---|:---|
-| BAM/CRAM files | Raw sequence data - analyzed locally |
-| VCF files | Full variant calls - only private SNPs shared |
-| FASTQ files | Raw reads - never leave the workbench |
-| Genotype chip data | Raw calls - ancestry computed locally |
-| IBD segments | Sensitive relationship data - only match confirmation shared |
-
-Without backfeed, user PDS records become stale and diverge from the AppView's refined understanding.
-
-### Current Gap
-
-The Atmosphere Lexicon defines records that the AppView **writes to user PDS** (e.g., `matchList`, `haplogroupAncestralStr`), but lacks:
-
-1. A systematic enumeration of all backfeed scenarios
-2. New record types for AppView-computed updates
-3. Authorization model for AppView writing to user PDS
-4. Conflict resolution when local and remote changes collide
-5. Notification mechanism for users to see what changed
-6. Audit trail for all AppView-initiated updates
-
----
-
-## Backfeed Categories
-
-### Category 1: AppView-Authored Records
-
-Records created entirely by the AppView and pushed to user PDS. User cannot create these directly.
-
-| Record Type | Trigger | Content |
-|:---|:---|:---|
-| `potentialMatchList` | Network analysis identifies candidates | List of potential matches for user to explore |
-| `confirmedMatch` | Both parties agree on match result | Stamped match record with agreed details |
-| `haplogroupAncestralStr` | STR reconstruction runs | Ancestral modal haplotype for haplogroup branch |
-
-**Note**: `populationBreakdown` is computed locally in the Workbench and synced to PDS by the user, NOT authored by AppView.
-
-### Category 2: AppView-Updated Records
-
-Records created by the user (via Workbench) but updated by the AppView when network intelligence provides new information.
-
-| Record Type | Field(s) Updated | Trigger |
-|:---|:---|:---|
-| `biosample` | `haplogroups.yDna.haplogroupName` | Tree update refines terminal haplogroup |
-| `biosample` | `haplogroups.mtDna.haplogroupName` | Tree update refines terminal haplogroup |
-| `biosample` | `haplogroups.*.privateVariants` | Private variants reclassified as known branch |
-| `biosample` | `haplogroups.*.lineagePath` | Tree restructuring changes ancestry path |
-
-**Note**: `alignment.metrics`, `strProfile`, and `populationBreakdown` are computed locally and NOT updated by AppView.
-
-### Category 3: AppView-Notification Records
-
-New record types to notify users of changes without modifying their source records.
-
-| Record Type | Purpose |
-|:---|:---|
-| `haplogroupUpdate` | Notify of haplogroup refinement from tree update |
-| `branchDiscovery` | Notify that user's private variants became official branch |
-| `treeVersionUpdate` | Notify that reference tree version changed (may affect assignments) |
-
----
-
-## New Lexicon Records for Backfeed
-
-### 1. Haplogroup Update Notification (`com.decodingus.atmosphere.haplogroupUpdate`)
-
-Sent to user's PDS when their biosample's haplogroup assignment changes.
-
-**NSID:** `com.decodingus.atmosphere.haplogroupUpdate`
-
-**Author:** AppView (DecodingUs)
-
-```json
-{
- "lexicon": 1,
- "id": "com.decodingus.atmosphere.haplogroupUpdate",
- "defs": {
- "main": {
- "type": "record",
- "description": "Notification that a biosample's haplogroup assignment has been refined or corrected.",
- "key": "tid",
- "record": {
- "type": "object",
- "required": ["meta", "atUri", "biosampleRef", "updateType", "lineage", "previous", "current"],
- "properties": {
- "atUri": {
- "type": "string",
- "description": "The AT URI of this update notification."
- },
- "meta": {
- "type": "ref",
- "ref": "com.decodingus.atmosphere.defs#recordMeta"
- },
- "biosampleRef": {
- "type": "string",
- "description": "AT URI of the biosample that was updated."
- },
- "updateType": {
- "type": "string",
- "description": "Type of haplogroup update.",
- "knownValues": ["REFINEMENT", "CORRECTION", "BRANCH_DISCOVERY", "TREE_UPDATE", "RECLASSIFICATION"]
- },
- "lineage": {
- "type": "string",
- "description": "Which lineage was updated.",
- "knownValues": ["Y_DNA", "MT_DNA"]
- },
- "previous": {
- "type": "ref",
- "ref": "#haplogroupState",
- "description": "The previous haplogroup assignment."
- },
- "current": {
- "type": "ref",
- "ref": "#haplogroupState",
- "description": "The new haplogroup assignment."
- },
- "reason": {
- "type": "string",
- "description": "Human-readable explanation of why the change occurred."
- },
- "treeVersion": {
- "type": "string",
- "description": "Haplogroup tree version that triggered the update (e.g., 'ISOGG-2025.1')."
- },
- "effectiveAt": {
- "type": "string",
- "format": "datetime",
- "description": "When this update took effect."
- },
- "acknowledgement": {
- "type": "ref",
- "ref": "#updateAcknowledgement",
- "description": "User's acknowledgement of the update (optional)."
- }
- }
- }
- },
- "haplogroupState": {
- "type": "object",
- "description": "Snapshot of a haplogroup assignment at a point in time.",
- "required": ["haplogroupName"],
- "properties": {
- "haplogroupName": {
- "type": "string"
- },
- "score": {
- "type": "float"
- },
- "treeDepth": {
- "type": "integer"
- },
- "lineagePath": {
- "type": "array",
- "items": { "type": "string" }
- }
- }
- },
- "updateAcknowledgement": {
- "type": "object",
- "description": "User's acknowledgement of an update.",
- "properties": {
- "acknowledgedAt": {
- "type": "string",
- "format": "datetime"
- },
- "accepted": {
- "type": "boolean",
- "description": "True if user accepts, false if they dispute."
- },
- "disputeReason": {
- "type": "string",
- "description": "Reason for disputing (if accepted=false)."
- }
- }
- }
- }
-}
-```
-
-### 2. Branch Discovery Notification (`com.decodingus.atmosphere.branchDiscovery`)
-
-Sent when a user's private variants have been promoted to an official haplogroup branch.
-
-**NSID:** `com.decodingus.atmosphere.branchDiscovery`
-
-**Author:** AppView (DecodingUs)
-
-```json
-{
- "lexicon": 1,
- "id": "com.decodingus.atmosphere.branchDiscovery",
- "defs": {
- "main": {
- "type": "record",
- "description": "Notification that private variants from a biosample have been promoted to an official branch.",
- "key": "tid",
- "record": {
- "type": "object",
- "required": ["meta", "atUri", "biosampleRef", "newBranchName", "definingVariants", "discoveredAt"],
- "properties": {
- "atUri": {
- "type": "string",
- "description": "The AT URI of this discovery notification."
- },
- "meta": {
- "type": "ref",
- "ref": "com.decodingus.atmosphere.defs#recordMeta"
- },
- "biosampleRef": {
- "type": "string",
- "description": "AT URI of the biosample that contributed to the discovery."
- },
- "lineage": {
- "type": "string",
- "description": "Which lineage (Y-DNA or mtDNA).",
- "knownValues": ["Y_DNA", "MT_DNA"]
- },
- "parentBranch": {
- "type": "string",
- "description": "The parent haplogroup from which the new branch descends."
- },
- "newBranchName": {
- "type": "string",
- "description": "Name of the newly discovered branch (e.g., 'R-FT54321')."
- },
- "definingVariants": {
- "type": "array",
- "description": "The variants that define this new branch.",
- "items": {
- "type": "ref",
- "ref": "com.decodingus.atmosphere.defs#variantCall"
- }
- },
- "contributingSamples": {
- "type": "integer",
- "description": "Number of biosamples that share these variants."
- },
- "discoveredAt": {
- "type": "string",
- "format": "datetime",
- "description": "When the branch was officially added to the tree."
- },
- "curatorNotes": {
- "type": "string",
- "description": "Optional notes from the curator who approved the branch."
- },
- "citationDoi": {
- "type": "string",
- "description": "DOI of publication if branch was discovered through academic research."
- }
- }
- }
- }
- }
-}
-```
-
-### 3. Tree Version Update Notification (`com.decodingus.atmosphere.treeVersionUpdate`)
-
-Sent when the haplogroup reference tree is updated, which may affect user's assignments.
-
-**NSID:** `com.decodingus.atmosphere.treeVersionUpdate`
-
-**Author:** AppView (DecodingUs)
-
-```json
-{
- "lexicon": 1,
- "id": "com.decodingus.atmosphere.treeVersionUpdate",
- "defs": {
- "main": {
- "type": "record",
- "description": "Notification that the haplogroup reference tree has been updated.",
- "key": "tid",
- "record": {
- "type": "object",
- "required": ["meta", "atUri", "lineage", "previousVersion", "newVersion", "effectiveAt"],
- "properties": {
- "atUri": {
- "type": "string",
- "description": "The AT URI of this tree update notification."
- },
- "meta": {
- "type": "ref",
- "ref": "com.decodingus.atmosphere.defs#recordMeta"
- },
- "lineage": {
- "type": "string",
- "description": "Which lineage tree was updated.",
- "knownValues": ["Y_DNA", "MT_DNA"]
- },
- "previousVersion": {
- "type": "string",
- "description": "Previous tree version (e.g., 'ISOGG-2024.12')."
- },
- "newVersion": {
- "type": "string",
- "description": "New tree version (e.g., 'ISOGG-2025.01')."
- },
- "effectiveAt": {
- "type": "string",
- "format": "datetime",
- "description": "When the new tree version became active."
- },
- "affectedBiosamples": {
- "type": "array",
- "description": "List of user's biosamples that may be affected.",
- "items": {
- "type": "ref",
- "ref": "#affectedBiosample"
- }
- },
- "changelogUrl": {
- "type": "string",
- "format": "uri",
- "description": "URL to the tree changelog/release notes."
- },
- "summary": {
- "type": "string",
- "description": "Human-readable summary of changes relevant to user."
- }
- }
- }
- },
- "affectedBiosample": {
- "type": "object",
- "description": "A biosample potentially affected by tree changes.",
- "required": ["biosampleRef", "currentHaplogroup"],
- "properties": {
- "biosampleRef": {
- "type": "string",
- "description": "AT URI of the affected biosample."
- },
- "currentHaplogroup": {
- "type": "string",
- "description": "Current haplogroup assignment."
- },
- "mayChange": {
- "type": "boolean",
- "description": "True if this biosample's assignment may change."
- },
- "suggestedAction": {
- "type": "string",
- "description": "Recommended action (e.g., 'Re-analyze in Workbench').",
- "knownValues": ["NONE", "REVIEW", "REANALYZE"]
- }
- }
- }
- }
-}
-```
-
----
-
-## Collaborative Matching Model
-
-Unlike centralized DNA matching services that compute matches server-side, DecodingUs uses a **collaborative discovery** model where:
-
-1. **AppView identifies potential matches** across the network based on shared haplogroups, STR similarity, or other criteria
-2. **Users explore candidates** in their Workbench (Navigator), choosing which to investigate
-3. **Both parties must agree** on the match result before it's stamped as confirmed
-4. **Confirmed matches** are written to both users' PDS as permanent records
-
-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│ COLLABORATIVE MATCHING FLOW │
-├─────────────────────────────────────────────────────────────────────────┤
-│ │
-│ ┌─────────────┐ ┌─────────────┐ │
-│ │ Alice's │ │ Bob's │ │
-│ │ Workbench │ │ Workbench │ │
-│ └──────┬──────┘ └──────┬──────┘ │
-│ │ │ │
-│ │ 1. AppView identifies potential match │ │
-│ │◀─────────────────────────────────────────▶│ │
-│ │ (written to both PDS as candidates) │ │
-│ │ │ │
-│ │ 2. Alice explores match in Workbench │ │
-│ │─────▶ Reviews STR comparison │ │
-│ │─────▶ Compares haplogroup branches │ │
-│ │─────▶ Initiates match confirmation │ │
-│ │ │ │
-│ │ 3. AppView notifies Bob of request │ │
-│ │──────────────────────────────────────────▶│ │
-│ │ │ │
-│ │ 4. Bob reviews and confirms match │ │
-│ │◀──────────────────────────────────────────│ │
-│ │ │ │
-│ │ 5. AppView stamps confirmed match │ │
-│ │◀─────────────────────────────────────────▶│ │
-│ │ (written to BOTH PDS) │ │
-│ │ │ │
-└─────────────────────────────────────────────────────────────────────────┘
-```
-
-### 4. Potential Match List Record (`com.decodingus.atmosphere.potentialMatchList`)
-
-List of potential matches identified by the AppView for user exploration.
-
-**NSID:** `com.decodingus.atmosphere.potentialMatchList`
-
-**Author:** AppView (DecodingUs)
-
-```json
-{
- "lexicon": 1,
- "id": "com.decodingus.atmosphere.potentialMatchList",
- "defs": {
- "main": {
- "type": "record",
- "description": "List of potential genetic matches for user to explore in Workbench.",
- "key": "tid",
- "record": {
- "type": "object",
- "required": ["meta", "atUri", "biosampleRef", "candidates"],
- "properties": {
- "atUri": {
- "type": "string",
- "description": "The AT URI of this potential match list."
- },
- "meta": {
- "type": "ref",
- "ref": "com.decodingus.atmosphere.defs#recordMeta"
- },
- "biosampleRef": {
- "type": "string",
- "description": "AT URI of the biosample these candidates relate to."
- },
- "candidateCount": {
- "type": "integer",
- "description": "Total number of potential matches."
- },
- "lastUpdatedAt": {
- "type": "string",
- "format": "datetime",
- "description": "When candidate list was last refreshed."
- },
- "candidates": {
- "type": "array",
- "description": "List of potential match candidates.",
- "items": {
- "type": "ref",
- "ref": "#matchCandidate"
- }
- }
- }
- }
- },
- "matchCandidate": {
- "type": "object",
- "description": "A potential match candidate for user exploration.",
- "required": ["candidateBiosampleRef", "matchType", "similarity"],
- "properties": {
- "candidateBiosampleRef": {
- "type": "string",
- "description": "AT URI of the potential match's biosample."
- },
- "candidateDid": {
- "type": "string",
- "description": "DID of the potential match (if they consent to visibility)."
- },
- "matchType": {
- "type": "string",
- "description": "Type of potential match.",
- "knownValues": ["Y_STR", "Y_SNP_HAPLOGROUP", "MT_HAPLOGROUP", "AUTOSOMAL_IBD"]
- },
- "similarity": {
- "type": "float",
- "description": "Similarity score (0.0-1.0) for ranking candidates."
- },
- "sharedHaplogroup": {
- "type": "string",
- "description": "Common haplogroup if Y-DNA or mtDNA match."
- },
- "geneticDistance": {
- "type": "integer",
- "description": "STR genetic distance if Y-STR match."
- },
- "estimatedRelationship": {
- "type": "string",
- "description": "Rough relationship estimate based on match type."
- },
- "identifiedAt": {
- "type": "string",
- "format": "datetime",
- "description": "When this candidate was identified."
- },
- "status": {
- "type": "string",
- "description": "Current status of this candidate.",
- "knownValues": ["NEW", "VIEWED", "EXPLORING", "PENDING_CONFIRMATION", "CONFIRMED", "DECLINED"]
- }
- }
- }
- }
-}
-```
-
-### 5. Confirmed Match Record (`com.decodingus.atmosphere.confirmedMatch`)
-
-A confirmed match stamped by the AppView after both parties agree.
-
-**NSID:** `com.decodingus.atmosphere.confirmedMatch`
-
-**Author:** AppView (DecodingUs)
-
-```json
-{
- "lexicon": 1,
- "id": "com.decodingus.atmosphere.confirmedMatch",
- "defs": {
- "main": {
- "type": "record",
- "description": "A confirmed genetic match agreed upon by both parties.",
- "key": "tid",
- "record": {
- "type": "object",
- "required": ["meta", "atUri", "biosampleRef", "matchedBiosampleRef", "matchType", "confirmedAt"],
- "properties": {
- "atUri": {
- "type": "string",
- "description": "The AT URI of this confirmed match record."
- },
- "meta": {
- "type": "ref",
- "ref": "com.decodingus.atmosphere.defs#recordMeta"
- },
- "biosampleRef": {
- "type": "string",
- "description": "AT URI of this user's biosample."
- },
- "matchedBiosampleRef": {
- "type": "string",
- "description": "AT URI of the matched biosample."
- },
- "matchedCitizenDid": {
- "type": "string",
- "description": "DID of the matched citizen."
- },
- "matchType": {
- "type": "string",
- "description": "Type of confirmed match.",
- "knownValues": ["Y_STR", "Y_SNP_HAPLOGROUP", "MT_HAPLOGROUP", "AUTOSOMAL_IBD"]
- },
- "matchDetails": {
- "type": "ref",
- "ref": "#confirmedMatchDetails",
- "description": "Detailed match information based on match type."
- },
- "confirmedAt": {
- "type": "string",
- "format": "datetime",
- "description": "When both parties confirmed the match."
- },
- "initiatedBy": {
- "type": "string",
- "description": "DID of the party who initiated confirmation."
- },
- "confirmedBy": {
- "type": "string",
- "description": "DID of the party who accepted confirmation."
- },
- "notes": {
- "type": "string",
- "description": "Optional notes about the match relationship."
- }
- }
- }
- },
- "confirmedMatchDetails": {
- "type": "object",
- "description": "Detailed match metrics based on match type.",
- "properties": {
- "sharedHaplogroup": {
- "type": "string",
- "description": "Common haplogroup (Y-DNA or mtDNA matches)."
- },
- "geneticDistance": {
- "type": "integer",
- "description": "STR genetic distance (Y-STR matches)."
- },
- "tmrcaEstimate": {
- "type": "object",
- "description": "Estimated time to most recent common ancestor.",
- "properties": {
- "generations": { "type": "integer" },
- "yearsBeforePresent": { "type": "integer" },
- "confidenceInterval": {
- "type": "object",
- "properties": {
- "lower": { "type": "integer" },
- "upper": { "type": "integer" }
- }
- }
- }
- },
- "sharedCm": {
- "type": "float",
- "description": "Total shared centiMorgans (autosomal IBD matches)."
- },
- "segmentCount": {
- "type": "integer",
- "description": "Number of shared segments (autosomal IBD matches)."
- },
- "relationshipEstimate": {
- "type": "string",
- "description": "Estimated relationship based on match data.",
- "knownValues": ["PARENT_CHILD", "SIBLING", "GRANDPARENT", "AUNT_UNCLE", "1ST_COUSIN",
- "2ND_COUSIN", "3RD_COUSIN", "4TH_COUSIN", "DISTANT", "UNKNOWN"]
- }
- }
- }
- }
-}
-```
-
-### 6. Sync Status Record (`com.decodingus.atmosphere.syncStatus`)
-
-A record in the user's PDS tracking the sync state with the AppView.
-
-**NSID:** `com.decodingus.atmosphere.syncStatus`
-
-**Author:** AppView (DecodingUs)
-
-```json
-{
- "lexicon": 1,
- "id": "com.decodingus.atmosphere.syncStatus",
- "defs": {
- "main": {
- "type": "record",
- "description": "Tracks synchronization status between user's PDS and the AppView.",
- "key": "literal:self",
- "record": {
- "type": "object",
- "required": ["meta", "atUri", "lastSyncAt", "appViewVersion"],
- "properties": {
- "atUri": {
- "type": "string",
- "description": "The AT URI of this sync status record."
- },
- "meta": {
- "type": "ref",
- "ref": "com.decodingus.atmosphere.defs#recordMeta"
- },
- "lastSyncAt": {
- "type": "string",
- "format": "datetime",
- "description": "Last successful sync with AppView."
- },
- "appViewVersion": {
- "type": "string",
- "description": "Version of the DecodingUs AppView."
- },
- "treeVersions": {
- "type": "object",
- "description": "Current haplogroup tree versions used.",
- "properties": {
- "yDna": { "type": "string" },
- "mtDna": { "type": "string" }
- }
- },
- "pendingUpdates": {
- "type": "integer",
- "description": "Number of pending updates to be applied."
- },
- "unacknowledgedNotifications": {
- "type": "integer",
- "description": "Number of notifications user hasn't acknowledged."
- },
- "biosampleSyncStates": {
- "type": "array",
- "description": "Per-biosample sync status.",
- "items": {
- "type": "ref",
- "ref": "#biosampleSyncState"
- }
- }
- }
- }
- },
- "biosampleSyncState": {
- "type": "object",
- "description": "Sync state for a single biosample.",
- "required": ["biosampleRef", "status"],
- "properties": {
- "biosampleRef": {
- "type": "string",
- "description": "AT URI of the biosample."
- },
- "status": {
- "type": "string",
- "description": "Current sync status.",
- "knownValues": ["SYNCED", "PENDING_UPDATE", "UPDATE_AVAILABLE", "CONFLICT", "ERROR"]
- },
- "lastUpdatedAt": {
- "type": "string",
- "format": "datetime"
- },
- "pendingFields": {
- "type": "array",
- "description": "Fields with pending updates.",
- "items": { "type": "string" }
- }
- }
- }
- }
-}
-```
-
----
-
-## Backfeed Authorization Model
-
-### AppView Service Account
-
-The DecodingUs AppView operates as a service account with delegated write access to user PDS records.
-
-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│ Authorization Flow │
-├─────────────────────────────────────────────────────────────────────────┤
-│ │
-│ 1. User authenticates with Navigator or Web UI │
-│ 2. User grants "AppView Write" scope to DecodingUs │
-│ 3. DecodingUs receives delegated credential (DPoP-bound access token) │
-│ 4. AppView uses credential to write backfeed records to user's PDS │
-│ │
-│ ┌─────────────────────┐ │
-│ │ User PDS │ │
-│ │ │ │
-│ │ Scopes granted to │ │
-│ │ DecodingUs: │ │
-│ │ │ │
-│ │ ✓ read:biosample │ (read user's biosamples) │
-│ │ ✓ write:potentialMatches │ (write potential match candidates) │
-│ │ ✓ write:confirmedMatch │ (stamp confirmed matches) │
-│ │ ✓ write:update │ (write update notifications) │
-│ │ ✓ update:biosample │ (update haplogroup fields) │
-│ │ │ │
-│ └─────────────────────┘ │
-│ │
-└─────────────────────────────────────────────────────────────────────────┘
-```
-
-### Scope Definitions
-
-| Scope | Allows |
-|:---|:---|
-| `com.decodingus.atmosphere:read` | Read all Atmosphere records |
-| `com.decodingus.atmosphere:write:potentialMatches` | Create/update potential match candidate lists |
-| `com.decodingus.atmosphere:write:confirmedMatch` | Stamp confirmed matches when both parties agree |
-| `com.decodingus.atmosphere:write:notification` | Create notification records (updates, discoveries) |
-| `com.decodingus.atmosphere:update:biosample` | Update specific fields on biosample records |
-| `com.decodingus.atmosphere:write:syncStatus` | Maintain sync status record |
-
-### Consent Flow
-
-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│ AppView Consent Dialog (shown in Navigator or Web) │
-├─────────────────────────────────────────────────────────────────────────┤
-│ │
-│ DecodingUs AppView is requesting access to your PDS: │
-│ │
-│ ┌─ Requested Permissions ────────────────────────────────────────────┐ │
-│ │ │ │
-│ │ ☑ Read your biosample records │ │
-│ │ Allow DecodingUs to read your genomic metadata │ │
-│ │ │ │
-│ │ ☑ Update your haplogroup assignments │ │
-│ │ Automatically apply refined haplogroups when tree updates │ │
-│ │ │ │
-│ │ ☑ Write potential match candidates │ │
-│ │ Notify you of potential genetic matches to explore │ │
-│ │ │ │
-│ │ ☑ Stamp confirmed matches │ │
-│ │ Record matches when both you and your match agree │ │
-│ │ │ │
-│ │ ☑ Send update notifications │ │
-│ │ Notify you when your data is updated │ │
-│ │ │ │
-│ └────────────────────────────────────────────────────────────────────┘ │
-│ │
-│ [Grant Access] [Deny] │
-│ │
-└─────────────────────────────────────────────────────────────────────────┘
-```
-
----
-
-## Backfeed Processing Pipeline
-
-### Architecture
-
-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│ DecodingUs Backend (AppView) │
-├─────────────────────────────────────────────────────────────────────────┤
-│ │
-│ ┌─────────────────────────────────────────────────────────────────┐ │
-│ │ Event Sources │ │
-│ ├──────────────────┬──────────────────┬───────────────────────────┤ │
-│ │ Tree Update Job │ Match Discovery │ Analysis Pipeline │ │
-│ │ (scheduled) │ (network scan) │ (on file upload) │ │
-│ └────────┬─────────┴────────┬─────────┴─────────────┬─────────────┘ │
-│ │ │ │ │
-│ ▼ ▼ ▼ │
-│ ┌─────────────────────────────────────────────────────────────────┐ │
-│ │ Backfeed Event Queue │ │
-│ │ (Kafka topic: decodingus.backfeed.events) │ │
-│ └──────────────────────────────┬──────────────────────────────────┘ │
-│ │ │
-│ ▼ │
-│ ┌─────────────────────────────────────────────────────────────────┐ │
-│ │ Backfeed Processor Service │ │
-│ │ │ │
-│ │ 1. Retrieve user's delegated credential │ │
-│ │ 2. Build appropriate Lexicon record │ │
-│ │ 3. Write record to user's PDS │ │
-│ │ 4. Update local sync tracking │ │
-│ │ 5. Handle failures with retry │ │
-│ │ │ │
-│ └──────────────────────────────┬──────────────────────────────────┘ │
-│ │ │
-│ ▼ │
-│ ┌─────────────────────────────────────────────────────────────────┐ │
-│ │ PDS Write Client │ │
-│ │ (AT Protocol XRPC: com.atproto.repo.createRecord/putRecord) │ │
-│ └─────────────────────────────────────────────────────────────────┘ │
-│ │
-└─────────────────────────────────────────────────────────────────────────┘
- │
- ▼
- ┌──────────────────────────┐
- │ User's PDS │
- │ - Updated records │
- │ - New notifications │
- │ - Sync status │
- └──────────────────────────┘
-```
-
-### Event Types
-
-```scala
-sealed trait BackfeedEvent {
- def citizenDid: String
- def biosampleAtUri: String
- def priority: BackfeedPriority
-}
-
-case class HaplogroupRefinementEvent(
- citizenDid: String,
- biosampleAtUri: String,
- lineage: Lineage,
- previousHaplogroup: String,
- newHaplogroup: String,
- reason: HaplogroupUpdateReason,
- treeVersion: String,
- priority: BackfeedPriority = BackfeedPriority.Normal
-) extends BackfeedEvent
-
-case class BranchDiscoveryEvent(
- citizenDid: String,
- biosampleAtUri: String,
- lineage: Lineage,
- newBranchName: String,
- parentBranch: String,
- definingVariantIds: Seq[Int],
- contributingSampleCount: Int,
- priority: BackfeedPriority = BackfeedPriority.High
-) extends BackfeedEvent
-
-case class PotentialMatchesEvent(
- citizenDid: String,
- biosampleAtUri: String,
- candidateCount: Int,
- newCandidates: Int,
- removedCandidates: Int,
- priority: BackfeedPriority = BackfeedPriority.Normal
-) extends BackfeedEvent
-
-case class ConfirmedMatchEvent(
- citizenDid: String,
- biosampleAtUri: String,
- matchedCitizenDid: String,
- matchedBiosampleAtUri: String,
- sharedCm: Float,
- segmentCount: Int,
- confirmedAt: Instant,
- priority: BackfeedPriority = BackfeedPriority.High
-) extends BackfeedEvent
-
-case class AnalysisCompleteEvent(
- citizenDid: String,
- biosampleAtUri: String,
- analysisType: AnalysisType,
- updatedRecordAtUri: String,
- pipelineVersion: String,
- priority: BackfeedPriority = BackfeedPriority.Low
-) extends BackfeedEvent
-
-enum BackfeedPriority:
- case High // Branch discovery, major haplogroup change, confirmed match
- case Normal // Regular updates, potential matches
- case Low // Analysis reruns, minor updates
-
-enum HaplogroupUpdateReason:
- case TreeUpdate // Reference tree was updated
- case BranchDiscovery // New branch added from consensus
- case Correction // Manual curator correction
- case Reclassification // Nomenclature change
- case RefinedAnalysis // Better analysis with same data
-```
-
-### Processing Logic
-
-```scala
-class BackfeedProcessorService(
- pdsClient: PdsWriteClient,
- credentialStore: DelegatedCredentialStore,
- syncTracker: SyncTracker
-) {
-
- def processEvent(event: BackfeedEvent): Future[BackfeedResult] = {
- for {
- // 1. Get user's delegated credential
- credential <- credentialStore.getCredential(event.citizenDid)
- .flatMap {
- case Some(cred) if cred.isValid => Future.successful(cred)
- case Some(cred) => refreshCredential(cred)
- case None => Future.failed(NoCredentialException(event.citizenDid))
- }
-
- // 2. Build the appropriate record(s)
- records <- buildRecords(event)
-
- // 3. Write to user's PDS
- results <- Future.traverse(records) { record =>
- pdsClient.writeRecord(
- credential = credential,
- collection = record.collection,
- record = record.data,
- rkey = record.rkey
- )
- }
-
- // 4. Update local sync tracking
- _ <- syncTracker.recordBackfeed(event, results)
-
- // 5. Update user's syncStatus record
- _ <- updateSyncStatus(credential, event.citizenDid)
-
- } yield BackfeedResult.Success(results.map(_.atUri))
- }
-
- private def buildRecords(event: BackfeedEvent): Future[Seq[BackfeedRecord]] = {
- event match {
- case e: HaplogroupRefinementEvent =>
- for {
- // Create notification record
- notification <- buildHaplogroupUpdateNotification(e)
- // Optionally update biosample directly if user consented
- biosampleUpdate <- if (autoUpdateEnabled(e.citizenDid)) {
- buildBiosampleHaplogroupUpdate(e).map(Some(_))
- } else Future.successful(None)
- } yield Seq(notification) ++ biosampleUpdate.toSeq
-
- case e: BranchDiscoveryEvent =>
- buildBranchDiscoveryNotification(e).map(Seq(_))
-
- case e: PotentialMatchesEvent =>
- buildPotentialMatchesRecord(e).map(Seq(_))
-
- case e: ConfirmedMatchEvent =>
- // Stamp confirmed match in BOTH parties' PDS
- for {
- record1 <- buildConfirmedMatchRecord(e, e.citizenDid)
- record2 <- buildConfirmedMatchRecord(e, e.matchedCitizenDid)
- } yield Seq(record1, record2)
-
- case e: AnalysisCompleteEvent =>
- buildAnalysisUpdateNotification(e).map(Seq(_))
- }
- }
-}
-```
-
----
-
-## Conflict Resolution
-
-### Scenario: Local and Remote Changes
-
-When Navigator syncs a locally-modified biosample that the AppView also updated:
-
-```
-Timeline:
-─────────────────────────────────────────────────────────────────────────
- t1: User syncs biosample with haplogroup R-L21 (atCid: abc123)
- t2: AppView refines to R-L21>FT54321, writes to PDS (atCid: def456)
- t3: User edits description locally (still has atCid: abc123)
- t4: User attempts sync → CONFLICT (atCid mismatch)
-─────────────────────────────────────────────────────────────────────────
-```
-
-### Resolution Strategy
-
-```scala
-enum ConflictResolutionStrategy:
- case AppViewWins // AppView-computed fields always win
- case UserWins // User's local changes always win
- case FieldLevel // Merge at field level
- case Manual // Require user decision
-
-val fieldResolutionRules: Map[String, ConflictResolutionStrategy] = Map(
- // AppView-computed fields - AppView always wins
- "haplogroups.yDna.haplogroupName" -> ConflictResolutionStrategy.AppViewWins,
- "haplogroups.yDna.score" -> ConflictResolutionStrategy.AppViewWins,
- "haplogroups.yDna.lineagePath" -> ConflictResolutionStrategy.AppViewWins,
- "haplogroups.mtDna.haplogroupName" -> ConflictResolutionStrategy.AppViewWins,
- "haplogroups.mtDna.score" -> ConflictResolutionStrategy.AppViewWins,
- "haplogroups.mtDna.lineagePath" -> ConflictResolutionStrategy.AppViewWins,
-
- // User-editable fields - User wins
- "description" -> ConflictResolutionStrategy.UserWins,
- "alias" -> ConflictResolutionStrategy.UserWins,
- "donorIdentifier" -> ConflictResolutionStrategy.UserWins,
-
- // Complex fields - Manual resolution
- "haplogroups.yDna.privateVariants" -> ConflictResolutionStrategy.Manual
-)
-```
-
-### Navigator Conflict UI
-
-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│ Sync Conflict Detected │
-├─────────────────────────────────────────────────────────────────────────┤
-│ │
-│ Biosample: VIK-003 │
-│ │
-│ Your local version and the AppView version have both changed. │
-│ │
-│ ┌─ Automatic Resolution ─────────────────────────────────────────────┐ │
-│ │ │ │
-│ │ ✓ Y-DNA Haplogroup: Using AppView value │ │
-│ │ Local: R-L21 AppView: R-L21>FT54321 │ │
-│ │ (AppView-computed fields always use latest refinement) │ │
-│ │ │ │
-│ │ ✓ Description: Using your local value │ │
-│ │ Local: "Updated analysis notes" │ │
-│ │ AppView: "Deep WGS of Proband" │ │
-│ │ (User-editable fields preserve your changes) │ │
-│ │ │ │
-│ └────────────────────────────────────────────────────────────────────┘ │
-│ │
-│ ┌─ Requires Your Decision ───────────────────────────────────────────┐ │
-│ │ │ │
-│ │ ⚠ Private Variants │ │
-│ │ │ │
-│ │ Local version has 5 private variants │ │
-│ │ AppView version has 3 (2 were promoted to R-L21>FT54321) │ │
-│ │ │ │
-│ │ ( ) Keep my 5 private variants │ │
-│ │ (•) Accept AppView's 3 (2 are now part of official branch) │ │
-│ │ ( ) Review each variant individually │ │
-│ │ │ │
-│ └────────────────────────────────────────────────────────────────────┘ │
-│ │
-│ [Apply Resolution] [Cancel] │
-│ │
-└─────────────────────────────────────────────────────────────────────────┘
-```
-
----
-
-## Notification Aggregation
-
-To avoid overwhelming users with individual notifications, the AppView aggregates updates:
-
-### Daily Digest Record (`com.decodingus.atmosphere.updateDigest`)
-
-```json
-{
- "lexicon": 1,
- "id": "com.decodingus.atmosphere.updateDigest",
- "defs": {
- "main": {
- "type": "record",
- "description": "Daily digest of all updates for a user's biosamples.",
- "key": "tid",
- "record": {
- "type": "object",
- "required": ["meta", "atUri", "periodStart", "periodEnd", "summary"],
- "properties": {
- "atUri": { "type": "string" },
- "meta": { "type": "ref", "ref": "com.decodingus.atmosphere.defs#recordMeta" },
- "periodStart": { "type": "string", "format": "datetime" },
- "periodEnd": { "type": "string", "format": "datetime" },
- "summary": {
- "type": "object",
- "properties": {
- "haplogroupUpdates": { "type": "integer" },
- "branchDiscoveries": { "type": "integer" },
- "newPotentialMatches": { "type": "integer" },
- "confirmedMatches": { "type": "integer" },
- "analysisUpdates": { "type": "integer" }
- }
- },
- "updateRefs": {
- "type": "array",
- "description": "AT URIs of individual update notifications.",
- "items": { "type": "string" }
- },
- "highlights": {
- "type": "array",
- "description": "Most significant updates to call out.",
- "items": { "type": "ref", "ref": "#digestHighlight" }
- }
- }
- }
- },
- "digestHighlight": {
- "type": "object",
- "properties": {
- "type": { "type": "string", "knownValues": ["BRANCH_DISCOVERY", "CONFIRMED_MATCH", "HAPLOGROUP_REFINEMENT", "NEW_POTENTIAL_MATCH"] },
- "biosampleRef": { "type": "string" },
- "message": { "type": "string" }
- }
- }
- }
-}
-```
-
----
-
-## Implementation Phases
-
-### Phase 1: Notification Infrastructure
-- Implement `haplogroupUpdate` and `branchDiscovery` notification records
-- Build backfeed event queue and processor
-- Establish delegated credential storage and management
-- Create basic Navigator UI for viewing notifications
-
-### Phase 2: Collaborative Matching
-- Implement `potentialMatchList` record for match candidates
-- Add `confirmedMatch` record stamping when both parties agree
-- Build match exploration UI in Navigator Workbench
-- Implement match confirmation workflow
-
-### Phase 3: Direct Record Updates
-- Implement `biosample.haplogroups` field updates with user consent
-- Implement conflict resolution logic
-- Extend Navigator sync to handle AppView-modified records
-
-### Phase 4: Full Sync Loop
-- Add `syncStatus` record management
-- Implement `updateDigest` for daily summaries
-- Build comprehensive Navigator sync dashboard
-- Add push notifications (optional)
-
-### Phase 5: Advanced Features
-- Real-time WebSocket updates for immediate notification
-- Selective sync (user can pause certain update types)
-- Audit log accessible to users
-- Dispute workflow for incorrect haplogroup assignments
-
----
-
-## Database Schema Additions
-
-```sql
--- Track delegated credentials for PDS write access
-CREATE TABLE pds_delegated_credential (
- id SERIAL PRIMARY KEY,
- citizen_did TEXT NOT NULL UNIQUE,
- access_token TEXT NOT NULL,
- refresh_token TEXT,
- token_type TEXT DEFAULT 'DPoP',
- scopes TEXT[] NOT NULL,
- expires_at TIMESTAMP NOT NULL,
- created_at TIMESTAMP DEFAULT NOW(),
- updated_at TIMESTAMP DEFAULT NOW()
-);
-
--- Track backfeed events sent to user PDS
-CREATE TABLE backfeed_event_log (
- id SERIAL PRIMARY KEY,
- citizen_did TEXT NOT NULL,
- biosample_at_uri TEXT NOT NULL,
- event_type TEXT NOT NULL,
- event_payload JSONB NOT NULL,
- record_at_uri TEXT,
- status TEXT DEFAULT 'PENDING', -- PENDING, SENT, FAILED, RETRYING
- attempts INT DEFAULT 0,
- last_attempt_at TIMESTAMP,
- error_message TEXT,
- created_at TIMESTAMP DEFAULT NOW(),
- sent_at TIMESTAMP
-);
-
-CREATE INDEX idx_backfeed_event_status ON backfeed_event_log(status);
-CREATE INDEX idx_backfeed_event_citizen ON backfeed_event_log(citizen_did);
-
--- Track sync state per biosample
-CREATE TABLE biosample_sync_state (
- id SERIAL PRIMARY KEY,
- citizen_did TEXT NOT NULL,
- biosample_at_uri TEXT NOT NULL,
- local_at_cid TEXT,
- appview_at_cid TEXT,
- last_user_update TIMESTAMP,
- last_appview_update TIMESTAMP,
- sync_status TEXT DEFAULT 'SYNCED', -- SYNCED, PENDING_USER, PENDING_APPVIEW, CONFLICT
- pending_fields TEXT[],
- created_at TIMESTAMP DEFAULT NOW(),
- updated_at TIMESTAMP DEFAULT NOW(),
-
- UNIQUE(citizen_did, biosample_at_uri)
-);
-```
-
----
-
-## Open Questions
-
-1. **Notification Retention**: How long should notification records be kept in user's PDS?
-
-2. **Credential Refresh**: How to handle expired credentials when user hasn't connected Navigator in months?
-
-3. **Rate Limiting**: What limits should apply to AppView writes to prevent abuse?
-
-4. **User Preferences**: Should users be able to opt out of specific update types?
-
-5. **Offline Users**: How to queue updates for users whose PDS is temporarily unreachable?
-
-6. **Multi-AppView**: If user grants access to multiple AppViews, how to coordinate?
-
-7. **Match Confirmation Timeout**: How long should a match confirmation request remain pending before expiring?
-
-8. **Potential Match Criteria**: What thresholds (STR distance, shared haplogroup depth) qualify someone as a potential match?
-
----
-
-## Related Documents
-
-- [Atmosphere Lexicon Design](../Atmosphere_Lexicon.md) - Base record schemas
-- [PDS Workbench Biosample Flow](../proposals/pds-workbench-biosample-flow.md) - Forward flow design
-- [Haplogroup Discovery System](./haplogroup-discovery-system.md) - Branch discovery triggers
-- [IBD Matching System](./ibd-matching-system.md) - Potential match identification criteria
diff --git a/documents/planning/d1-encrypted-edge-exchange.md b/documents/planning/d1-encrypted-edge-exchange.md
new file mode 100644
index 00000000..8bbe146d
--- /dev/null
+++ b/documents/planning/d1-encrypted-edge-exchange.md
@@ -0,0 +1,282 @@
+# D1 — Encrypted Edge-to-Edge Exchange + AppView Broker
+
+**Status:** Design (v0, 2026-06-06). The shared foundation in the AppView roadmap
+(`design-roadmap-rust-rewrite.md` §5). **Cross-repo:** specifies both the AppView
+**broker** (decodingus) and the Navigator/Edge **exchange endpoint** (DUNavigator).
+**Supersedes/generalizes** the crypto + Edge-coordination sections of the
+original IBD requirements (now folded into D3), and is the substrate the Navigator FTDNA design
+(`ftdna-project-import.md` §8) calls for.
+
+## 1. Purpose
+
+One encrypted, consent-gated, peer-to-peer exchange substrate that carries **any**
+sensitive payload between two AT-Proto identities (DIDs), with AppView acting only
+as a **broker** that never sees plaintext. Two consumers at launch:
+
+- **IBD comparison** — exchange encrypted variant positions / segment boundaries for
+ Edge-to-Edge IBD detection (the original IBD use; see D3).
+- **Genealogy PII** — exchange member names, MDKA, kit↔subject linkage, and
+ PII-bearing assertions between **co-admins** of a project (FTDNA platform, §8).
+
+Both are the same problem: *get sensitive data from one Edge to another, with mutual
+consent, without any server holding it.* Build the channel once.
+
+## 2. Invariants (non-negotiable)
+
+1. **AppView never sees plaintext.** It brokers discovery, consent, key-exchange
+ messages, and (optionally) relays **ciphertext only**. No PII, no genetic data,
+ no session keys at rest on AppView — ever. (Preserves the "anonymized-only"
+ posture; roadmap §3.)
+2. **Dual consent precedes any key exchange.** Both DIDs must sign a consent record;
+ the broker verifies **both signatures** before notifying either Edge to begin.
+3. **Forward secrecy.** Every session uses **ephemeral** ECDH keys; compromise of a
+ long-term key does not decrypt past sessions.
+4. **Verifiable peer identity.** Session keys are bound to each peer's **DID
+ identity key** (Ed25519), so a peer cannot be impersonated and AppView cannot
+ MITM (it never holds a usable key).
+5. **Plaintext at rest only on the Edge, encrypted.** Received PII/variants are
+ stored locally (Navigator SQLite), encrypted at rest; never re-uploaded.
+6. **Least metadata.** The broker learns *that* A and B exchanged, when, and rough
+ size — the same social-graph metadata it already has from match requests. It
+ learns nothing about content. Padding/batching mitigations in §11.
+
+## 3. Role split
+
+| | **Edge (Navigator)** | **Broker (AppView)** |
+| --- | --- | --- |
+| Holds plaintext (PII/variants) | ✅ local, encrypted | ❌ never |
+| Long-term identity key (Ed25519) | ✅ (via PDS/DID) | verifies signatures only |
+| Ephemeral session keys (X25519) | ✅ generates/rotates | ❌ |
+| Discovery / intent | consumes suggestions | ✅ generates (IBD suggestions; project co-membership) |
+| Consent records | signs, writes to PDS | ✅ mirrors + **verifies dual-signature** |
+| Key-exchange messages | sends/receives | ✅ **relays** (opaque) |
+| Ciphertext payload | encrypts/decrypts | ✅ **blind relay** (store-and-forward) or ❌ (direct P2P) |
+| Post-exchange action | IBD: compute+attest · Genealogy: decrypt+fold locally | indexes attestations (IBD) / records exchange-occurred |
+
+This is the IBD doc's split (§ "Edge App Responsibilities" / "DecodingUs
+Responsibilities"), generalized beyond IBD.
+
+## 4. Cryptographic suite
+
+Reaffirms the IBD spec, with the identity-binding gap fixed:
+
+```
+Identity / signatures: Ed25519 (AT Proto DID key; already in du-atproto)
+Key agreement: X25519 ECDH (NEW — add x25519-dalek)
+Session key derivation: HKDF-SHA-256
+Payload encryption: AES-256-GCM (AEAD; per-message random 96-bit IV)
+Integrity / summaries: SHA-256
+```
+
+**The identity-binding fix.** The IBD doc says "keys derived from PDS signing keys
+(verifiable)" — but the DID identity key is **Ed25519 (a signing key); it cannot do
+ECDH**. Resolution: each participant publishes a **static X25519 exchange public
+key** as an **Ed25519-signed PDS record** (`com.decodingus.exchange.publicKey`). The
+signature ties the X25519 key to the DID, so a peer fetches it, verifies the
+signature against the DID's identity key (`du-atproto::signature::verify_did_key`),
+and trusts it. (Do **not** birationally map Ed25519→X25519; publish a dedicated key.)
+
+**Per-session handshake (X3DH-lite, gives forward secrecy):**
+- Each peer holds: static `IK_x25519` (published, signed) + a fresh **ephemeral**
+ `EK_x25519` per session.
+- Shared secret `= ECDH(IK_A, EK_B) ‖ ECDH(EK_A, IK_B) ‖ ECDH(EK_A, EK_B)` →
+ `HKDF-SHA-256` → a session key. Static×ephemeral binds identity; ephemeral×
+ ephemeral gives forward secrecy.
+- Session key encrypts payloads with AES-256-GCM (fresh IV per message; `seq`
+ counter in AAD to order/dedupe). Keys **rotated per session** (IBD doc).
+
+## 5. Handshake & session state machine (generic)
+
+```
+ Edge A AppView (broker) Edge B
+ │ 1. intent (suggestion / co-membership) │ │
+ │◀──────────────────────────────────────────│ │
+ │ 2. exchange_request (signed PDS record) │ │
+ │───────────────────────────────────────────▶ mirror + notify B │
+ │ │──────────────────────▶│
+ │ │ 3. consent (signed) │
+ │ │◀──────────────────────│
+ │ verify BOTH signatures (dual-consent gate) │
+ │ 4. exchange-ready {partnerDid, partnerExchangeKeyUri} │
+ │◀───────────────────────────────────────────────────────────────▶│
+ │ 5. ECDH: publish/fetch static keys, swap ephemeral EK (relayed) │
+ │◀───────────────── key-exchange messages (opaque) ───────────────▶│
+ │ 6. encrypted payload ── blind relay (ciphertext) ──▶ │
+ │ │──────────────────────▶│
+ │ │ (B decrypts locally) │
+ │ 7a. IBD: B computes, both sign + attest → AppView indexes │
+ │ 7b. Genealogy: B folds PII locally; ack (exchange-occurred) │
+```
+
+Steps 1–4 are the broker's job (PII-free); steps 5–7 are Edge-to-Edge (opaque to the
+broker). The state machine generalizes IBD's Phase 1–4 (per D3):
+**intent → request → dual-consent → exchange-ready → ECDH → encrypted
+exchange → attest/ack.**
+
+## 6. Transport — DECIDED (2026-06-06): AppView-hosted blind relay primary, direct P2P later
+
+Edges are **desktop apps that are rarely online simultaneously**, and live behind
+NAT. So:
+
+> **Default: AppView-hosted blind store-and-forward relay.** The sender posts an
+> opaque envelope (ciphertext + minimal routing header) to the broker; it is held
+> until the recipient pulls it, then **deleted on ack** (or on TTL). AppView can
+> read **none** of it — it sees `{from_did, to_did, session_id, seq, size,
+> created_at}` and an opaque blob. This is consistent with Invariant 1: a transport
+> buffer of ciphertext is **not** a PII store.
+
+- **Why relay, not PDS-as-mailbox:** putting the ciphertext in a public AT-Proto
+ record would leak the *envelope metadata to the whole network*; the relay keeps it
+ within AppView, which already knows the social graph from consent records. Relay
+ also handles offline peers and TTL cleanly.
+- **Why relay, not direct P2P (for now):** direct P2P (QUIC/WebRTC + NAT traversal)
+ needs both peers online and a signaling/TURN path — more moving parts for the
+ common "other admin is offline" case. **Direct P2P is a later optimization** for
+ large payloads when both are online; the relay remains the fallback.
+- Relay caps: per-envelope size limit, TTL (e.g. 7 days), at-rest encryption of the
+ blob on AppView (defense-in-depth; it's already ciphertext), rate limits.
+
+*(Open: confirm relay-primary vs. P2P-primary — §12 Q1. Recommended: relay-primary.)*
+
+## 7. The generic envelope & payload families
+
+```
+ExchangeEnvelope {
+ session_id: UUID,
+ seq: u64, // ordering / replay guard (in AEAD AAD)
+ purpose: 'IBD_AUTOSOMAL' | 'IBD_Y' | 'IBD_MT' | 'GENEALOGY_PII' | ...,
+ payload_type:'VARIANT_POSITIONS' | 'SEGMENT_BOUNDARIES'
+ | 'SUBJECT_BUNDLE' | 'PII_ASSERTION',
+ iv: [u8;12],
+ ciphertext: Vec
, // AES-256-GCM
+ auth_tag: [u8;16],
+}
+```
+
+**Payload families (plaintext shapes, defined per consumer):**
+- **IBD** — `VARIANT_POSITIONS` / `SEGMENT_BOUNDARIES` (unchanged from the IBD doc).
+- **Genealogy** — `SUBJECT_BUNDLE`: `{ subject_id, external_ids[] (kit#…), member_name,
+ mdka[], notes }` (the PII the FTDNA importer holds, §4.2/§4.3 of the Navigator
+ doc); `PII_ASSERTION`: a single scoped assertion whose value contains PII
+ (`mdka_is`, `note`-with-name). These are exactly the records the Navigator design
+ routes to "encrypted P2P only" (§8.4).
+
+The envelope is payload-agnostic; consumers register a `purpose`/`payload_type` and a
+post-decrypt handler.
+
+## 8. Broker schema (PII-free) — generalize `ibd.match_*` into `exchange.*`
+
+The existing `ibd.match_request` / `ibd.match_consent` (mig 0007) are the IBD-specific
+seed. Generalize to a purpose-tagged `exchange` schema that IBD and genealogy share;
+IBD's tables become a specialization (or a view) keyed by `purpose='IBD_*'`.
+
+```
+exchange.exchange_request (
+ request_uri TEXT PRIMARY KEY, -- at:// URI of the signed PDS record
+ initiator_did TEXT NOT NULL,
+ partner_did TEXT NOT NULL,
+ purpose TEXT NOT NULL, -- IBD_* | GENEALOGY_PII
+ scope TEXT, -- e.g. 'project:' (consent boundary)
+ status TEXT NOT NULL, -- PENDING/CONSENTED/DECLINED/CANCELLED/EXPIRED
+ details JSONB NOT NULL DEFAULT '{}', created_at, updated_at
+);
+exchange.exchange_consent (
+ id, request_uri REFERENCES exchange_request, consenting_did, consent_given BOOL,
+ consent_uri TEXT, signature TEXT NOT NULL, created_at -- both sigs verified
+);
+exchange.exchange_session (
+ session_id UUID PRIMARY KEY, request_uri REFERENCES exchange_request,
+ status TEXT, -- ESTABLISHING/ACTIVE/COMPLETE/EXPIRED
+ created_at, expires_at
+);
+exchange.relay_envelope ( -- the blind buffer; ciphertext only
+ id, session_id REFERENCES exchange_session, from_did, to_did, seq INT,
+ size_bytes INT, blob BYTEA NOT NULL, -- opaque AES-GCM ciphertext envelope
+ created_at, expires_at, delivered_at -- deleted on ack or TTL
+);
+exchange.exchange_publickey ( -- mirror of the published, signed X25519 key
+ did TEXT PRIMARY KEY, x25519_pub BYTEA NOT NULL, key_uri TEXT, sig_verified_at
+);
+```
+
+**Note:** `relay_envelope.blob` holds **ciphertext only**; storing it does **not**
+violate Invariant 1 (AppView cannot decrypt it; it isn't a PII row). IBD's
+`ibd_discovery_index` / `ibd_pds_attestation` keep their existing roles (attestation
+indexing) downstream of a completed session.
+
+## 9. Code placement
+
+- **New shared crate `du-exchange`** (in `decodingus-shared`, used by Navigator and
+ the eventual Edge/IBD logic): X25519 (`x25519-dalek`), HKDF-SHA-256, AES-256-GCM
+ (`aes-gcm`), the `ExchangeEnvelope` (de)serialization, the X3DH-lite session
+ derivation, and the published-key record format. Pure Rust, no PII knowledge.
+- **`du-atproto`** already provides Ed25519 signing/verification + DID resolution —
+ reused for the signed key record and consent signatures (no change beyond adding
+ the key-record helpers).
+- **Navigator `navigator-sync`** gains the Edge endpoint: publish/fetch the exchange
+ key record (via `PdsClient`), the relay client (post/pull/ack envelopes against the
+ broker), and the session driver. Builds on the existing `Session`/`PdsClient`.
+- **AppView `du-web` + `du-db`**: the `exchange.*` query module + broker endpoints
+ (request mirror, dual-consent verify, exchange-ready notify, relay post/pull/ack).
+
+## 10. How the two consumers specialize
+
+| | **IBD** | **Genealogy PII** |
+| --- | --- | --- |
+| Intent source | `match_suggestion` (shared haplogroup/pop overlap) | shared **project co-membership** (admin team) |
+| `purpose` | `IBD_AUTOSOMAL`/`IBD_Y`/`IBD_MT` | `GENEALOGY_PII` |
+| Payload | variant positions / segment boundaries | `SUBJECT_BUNDLE` / `PII_ASSERTION` |
+| Post-decrypt | both compute IBD, hash, **sign + attest** → AppView indexes match | recipient **folds PII into local store**; ack only (no server index) |
+| Server record | `ibd_discovery_index` (match summary, PII-free) | none — exchange is private; only `current_view` of **non-PII** assertions (§8.2) |
+
+Same channel; different intent trigger and post-decrypt handler.
+
+## 11. Threat model & residual metadata
+
+- **Honest-but-curious AppView:** sees the social graph (who exchanged, when, size) —
+ identical to what consent records already reveal — and opaque ciphertext. Cannot
+ read content, cannot MITM (no usable key; static keys are DID-signed). Acceptable
+ given it already brokers matches.
+- **Mitigations (later):** envelope **padding** to fixed size buckets; **batching**/
+ cover traffic to blur timing; short relay TTL + delete-on-ack to minimize the
+ at-rest window.
+- **Replay/reorder:** `seq` in AEAD AAD + session expiry.
+- **Malicious peer:** can lie about *content* (e.g. a wrong MDKA) — out of scope for
+ the channel; handled at the assertion/provenance layer (§8.4) where claims are
+ attributed and disputable. The channel guarantees *who* and *confidentiality*, not
+ *truth*.
+- **Compromised Edge:** plaintext at rest is encrypted; key material in OS keychain
+ (as the OAuth tokens already are, `navigator-sync`).
+
+## 12. Open questions / decisions
+
+1. ~~Transport~~ **DECIDED: blind-relay-primary** (§6); direct P2P is a later
+ large-payload optimization.
+2. ~~Relay host~~ **DECIDED: AppView-hosted blind relay** (ciphertext + routing
+ metadata only, delete-on-ack/TTL).
+3. ~~Generalize now vs. IBD-first~~ **DECIDED: introduce `exchange.*` now**; IBD's
+ eventual impl rides it, `ibd.match_*` folds in.
+4. **Static-key rotation/revocation** — lifetime of the published X25519 key; revoke
+ by superseding the signed record. Define a rotation policy.
+5. **Padding/cover-traffic** — in v1 or deferred? (Recommend: fixed-bucket padding in
+ v1; cover traffic later.)
+6. **Group exchange** — a project has *N* co-admins; is exchange pairwise (N²) or is
+ there a group-key optimization? Pairwise for v1; revisit for large admin teams.
+
+## 13. Next step
+
+§12 Q1–Q3 decided (relay-primary, AppView-hosted, generalize-now). **AppView broker
+BUILT (2026-06-12):** `exchange.*` schema (mig 0032; `ibd.match_*` folded/dropped) +
+`du_db::exchange` (publish/fetch key, request, **dual-consent gate** → session,
+pending, blind relay post/pull/ack, TTL expire) + `du-web` `/api/v1/exchange/*`
+endpoints, all **signature-authenticated** (`du_atproto::verify_did_key`; `did:key`
+direct, `did:plc/web` resolved — no OAuth/cookie) + `du-jobs exchange-expire`. The
+broker is PII-free (Invariants 1/2/4/6 hold: verifies signatures, never sees plaintext
+or keys, relays opaque ciphertext). Memory `exchange-broker`.
+
+**Remaining for end-to-end:** the **`du-exchange` crate** (X25519 + AEAD + envelope +
+X3DH-lite) in `decodingus-shared` and the **Navigator relay client + session driver**
+(DUNavigator repo) — neither is an AppView concern. Proven end-to-end by a
+`GENEALOGY_PII` `SUBJECT_BUNDLE` round-trip between two test admins once Navigator's
+Edge endpoint lands, then reused for IBD.
+```
diff --git a/documents/planning/d2-research-subject-registry.md b/documents/planning/d2-research-subject-registry.md
new file mode 100644
index 00000000..d34f147b
--- /dev/null
+++ b/documents/planning/d2-research-subject-registry.md
@@ -0,0 +1,253 @@
+# D2 — ResearchSubject Registry + Cross-Admin Identity Resolution
+
+**Status:** Design (v0, 2026-06-06). AppView roadmap §5 D2. **Builds on:** D1
+(`d1-encrypted-edge-exchange.md`) for the encrypted channel; **reuses** the IBD
+resolver (D3) for genetic same-person signals; **feeds** D4 (assertions) / D5
+(group projects). **Cross-repo:** AppView registry + Navigator local mapping.
+
+> **Corrects the earlier sketch.** The Navigator FTDNA design (§8.3) proposed AppView
+> storing **salted `id_hashes[]`** of kit numbers (`HMAC(project_salt, kit#)`) as the
+> deterministic match key. **That does not survive scrutiny** (§4): kit numbers are a
+> small, enumerable space, and any salt AppView can see (or extract from a client) lets
+> it brute-force every hash back to the kit#. D2 replaces it: **AppView stores no
+> identifiers or hashes at all**; exact matching happens **Edge-to-Edge over D1**.
+
+## 1. Purpose
+
+Give the collaboration layer a **vendor-neutral, PII-free "person" node** that
+co-admins can attach assertions to and resolve across each other's imports — without
+AppView ever learning a name, a kit number, or even a hash of one.
+
+## 2. What a ResearchSubject is (and is not)
+
+A **ResearchSubject** is a pseudonymous handle for "a person under research in a
+project context." At AppView it is **almost empty**:
+
+```
+research_subject_id : UUID -- random; the ONLY cross-admin handle
+custody_did : DID | null -- null = admin-stewarded; set when the member claims it
+ -- (no names, no kit#, no MDKA, no hashes)
+```
+
+It **is not**:
+- `core.biosample` — that is a *federated, anonymized sample* from `fed.*` ingest. A
+ ResearchSubject **may** point at one (if the person published anonymized data) but
+ usually **does not**: the common bootstrap case is an FTDNA member who is **not on
+ platform**, whose clear-text identity lives only in the importing admin's local
+ store. ResearchSubject is the sparser, person-level node.
+- A PII record. Names/MDKA/kit# never appear here or anywhere server-side.
+
+## 3. The three-layer identity picture
+
+```
+ LOCAL (each admin's Navigator, clear-text, PII) SHARED (AppView, pseudonymous)
+ ┌───────────────────────────────────────┐ ┌──────────────────────────┐
+ │ biosample.guid (local Subject) │ maps to │ research_subject_id (UUID)│
+ │ external_id(source,id) e.g. FTDNA #128753 │◀────────▶│ + project membership │
+ │ ftdna_member / mdka (names, ancestors)│ (local │ + custody_did │
+ └───────────────────────────────────────┘ table) │ + current_view (non-PII)│
+ ▲ exact match via D1 channel ▲ └──────────────────────────┘
+ │ (co-admins exchange id lists, consented) ▲
+ └──────── genetic match via IBD/D3 ──────────────────┘
+ (optional) → core.biosample
+ if the person federates data
+```
+
+- **Local (Navigator):** clear-text identity — `biosample.guid`, `external_id`
+ (kit#), `ftdna_member`, `mdka`. Never leaves the box except as encrypted D1 payload.
+- **Shared (AppView):** the pseudonymous `research_subject_id` + project memberships
+ + custody + the non-PII `current_view`. **No identifiers.**
+- **The map between them** (`biosample.guid ↔ research_subject_id`) is held **locally
+ by each admin**; admins reconcile their maps to a common `research_subject_id`
+ through the resolution mechanisms in §4.
+
+## 4. Resolution — how two admins agree on the same ResearchSubject
+
+Three mechanisms, in precedence order. **None reveals an identifier to AppView.**
+
+### 4.1 Deterministic exact match — id-list exchange over D1 (v1)
+
+Within a **shared project**, two co-admins have already consented to collaborate
+(D1 dual-consent). Co-admins in FTDNA's GAP see *all* members of a shared project
+anyway, so **exchanging their `(source, external_id)` lists over the encrypted D1
+channel is within the consented scope** — no fancy crypto needed:
+
+1. Admin A and B establish a D1 session (`purpose=GENEALOGY_PII`).
+2. They exchange their project's `(source, external_id)` lists (encrypted).
+3. Each computes the **intersection locally**; matching kits ⇒ same person.
+4. For a match, they agree on a **shared `research_subject_id`** (lexicographically-
+ lower admin's existing id wins, or mint one) and each records the mapping locally;
+ one registers the subject + both memberships at AppView (pseudonymous).
+
+**AppView sees:** two `research_subject_id`s gained a second project membership.
+**It never sees** the kit numbers or that "128753" was the link.
+
+> **Why not AppView-side hashing?** Kit numbers are ~6–7 digit enumerable values; a
+> broker that holds `HMAC(salt, kit#)` and can obtain the salt (it ships in the
+> client) brute-forces the whole space in milliseconds. Edge-to-Edge id exchange
+> keeps the broker blind by construction. (PSI — §4.4 — is the upgrade for the
+> *cross-project* case where admins should learn only the intersection.)
+
+### 4.2 Genetic same-person / close-kin — IBD over D1 (reuses D3)
+
+When ids differ but the people may be the same (or close kin), run the **IBD/
+haplotype comparison Edge-to-Edge** (D3, same D1 channel) → a *suggested* merge with
+a confidence, surfaced to both admins. **Never auto-merged.** This catches duplicates
+across vendors (FTDNA kit vs. a direct-WGS sample) where no shared id exists.
+
+### 4.3 Assertion-mediated — pseudonymous `same_person` (D4)
+
+An admin publishes a `same_person(research_subject_id_A, research_subject_id_B)`
+assertion (pseudonymous ids only, no kit#). The group accepts/rejects; provenance
+retained. This is the manual override and the audit trail for 4.1/4.2 outcomes.
+
+### 4.4 Cross-project linking — member-claim only (NOT silent AppView merge)
+
+Auto-linking the *same person across projects they did not consent to be linked
+across* is **privacy-hostile** (cross-context deanonymization) and is **deliberately
+not** an AppView background job. Cross-project consolidation happens only when the
+**member themselves claims** their subjects (§6) and chooses to merge them. PSI
+(§4.1 note) is the future tool that would let two *non-co-admins* discover a shared
+member with consent — out of scope for v1.
+
+## 5. AppView schema (`research.*`, PII-free)
+
+```sql
+CREATE SCHEMA research;
+
+CREATE TABLE research.research_subject (
+ research_subject_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+ custody_did TEXT, -- null = admin-stewarded; set on claim
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now()
+); -- deliberately no ids/names/hashes
+
+CREATE TABLE research.subject_membership (
+ research_subject_id UUID NOT NULL REFERENCES research.research_subject(research_subject_id) ON DELETE CASCADE,
+ project_id BIGINT NOT NULL, -- group-project (D5)
+ steward_did TEXT NOT NULL, -- the admin who holds the local clear-text identity
+ added_at TIMESTAMPTZ NOT NULL DEFAULT now(),
+ PRIMARY KEY (research_subject_id, project_id)
+);
+CREATE INDEX subject_membership_project_idx ON research.subject_membership(project_id);
+
+CREATE TABLE research.subject_link ( -- audit of 4.1/4.2/4.3 merges (pseudonymous)
+ id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
+ subject_a UUID NOT NULL, subject_b UUID NOT NULL, -- merged ids (a kept, b retired)
+ method TEXT NOT NULL, -- ID_EXCHANGE | GENETIC | ASSERTION | CLAIM
+ asserted_by_did TEXT NOT NULL, confidence DOUBLE PRECISION, created_at TIMESTAMPTZ DEFAULT now()
+);
+
+-- optional, sparse: link to a federated sample IF the person published anonymized data
+CREATE TABLE research.subject_biosample (
+ research_subject_id UUID REFERENCES research.research_subject(research_subject_id) ON DELETE CASCADE,
+ sample_guid UUID REFERENCES core.biosample(sample_guid),
+ PRIMARY KEY (research_subject_id, sample_guid)
+);
+```
+
+`current_view` (the materialized, non-PII per-subject summary — branch assignments,
+pseudonymous links, aggregate stats) is produced by the **assertion store (D4)**, not
+D2; D2 only owns the registry + memberships + link audit.
+
+**Invariant:** every column above is pseudonymous. A reviewer can confirm no PII path
+exists into `research.*`.
+
+## 6. Custody & member-claim (high level; proof = open Q)
+
+- **Stewardship:** on import, each Subject is **admin-stewarded** — `custody_did =
+ null`, `subject_membership.steward_did` = the importing admin. "Steward" just means
+ "the admin whose local store holds the clear-text identity"; AppView stores no PII.
+- **Claim:** a member onboards (gets a DID/PDS), proves to a steward admin that they
+ control the kit (mechanism TBD — §10), the admin transfers custody over D1, and the
+ steward sets `custody_did = member_did` (a pseudonymous pointer flip). The member
+ then controls their own clear-text data locally and decides federation/visibility
+ (the `group-project-system.md` sovereign end-state).
+- **Proof of kit control is the open question** — AppView can't verify it (no PII).
+ Likely admin-mediated (the steward, who *does* hold the kit data, vouches) or a
+ vendor credential the member presents to the admin. Specify in D5/claim follow-up.
+
+## 7. Navigator-side local model
+
+Each admin's Navigator gains the local map (clear-text side stays in SQLite):
+
+```sql
+-- Navigator local (migration alongside FTDNA import 0014–0016)
+CREATE TABLE subject_shared_id (
+ biosample_guid TEXT PRIMARY KEY REFERENCES biosample(guid),
+ research_subject_id TEXT NOT NULL, -- the AppView pseudonym
+ project_id INTEGER, -- which shared project this binding is for
+ custody TEXT NOT NULL DEFAULT 'STEWARDED' -- STEWARDED | CLAIMED_BY_ME | CLAIMED_BY_OTHER
+);
+```
+
+- On **import**, a local Subject has no shared id until the project is joined/shared.
+- On **join a shared project**, Navigator mints a `research_subject_id` per Subject (or
+ links existing via §4.1 id-exchange), registers pseudonymous nodes + memberships at
+ AppView, and stores the binding locally.
+- The `external_id`↔`research_subject_id` correspondence is **only** local + exchanged
+ over D1; never sent to AppView.
+
+## 8. End-to-end: two admins, one shared project
+
+```
+Admin A imports FTDNA project P (local Subjects, kit#s, MDKA — all local)
+Admin B is invited as co-admin of P ── D1 dual-consent ──▶ session
+A ⇄ B exchange (source,id) lists over D1 (encrypted) [§4.1]
+ → local intersection: kits 128753, 145002 match
+A & B agree shared research_subject_id for each match; mint new for the rest
+ register pseudonymous subjects + memberships at AppView (no ids)
+A ⇄ B exchange SUBJECT_BUNDLE (names, MDKA) over D1 for shared subjects [D1/§7]
+ → each folds PII into its own local store
+later genetic suggestion (D3) flags kit 9001 (A) ≈ sample S (B) → suggested merge
+ → B accepts via same_person assertion (D4); subject_link audited
+member M onboards, proves control of 128753 to steward A → custody flips to M_did [§6]
+```
+
+AppView's whole view of this: a few pseudonymous `research_subject` rows gained
+memberships and links. No names, no kits, no MDKA — ever.
+
+## 9. Threat model / what AppView learns
+
+- **Pseudonymous social graph:** which `research_subject_id`s belong to which projects
+ and which DIDs steward them — the same membership graph it needs for D5 ACLs, and no
+ worse than GAP's "admins know their project's members." No identifiers.
+- **No identifier exposure:** kit#/name/MDKA never reach AppView; exact matching is
+ Edge-to-Edge (§4.1); the broker can't brute-force what it never stores.
+- **Steward de-anonymization risk:** `steward_did` links a person-node to a real admin
+ DID. That's inherent (someone must hold the data) and bounded — it reveals
+ *custodianship*, not identity. Mitigation: stewardship is per-project, and claim
+ (§6) lets the member take over.
+- **Malicious admin** can mint bogus subjects or wrong links — bounded by the
+ assertion/provenance layer (D4): links are attributed and disputable.
+
+## 10. Open questions / decisions
+
+1. ~~Deterministic mechanism~~ **DECIDED: id-list exchange over D1** within a shared
+ project (§4.1, GAP-equivalent); AppView stores no ids/hashes. PSI deferred for the
+ cross-project case.
+2. ~~Cross-project linking policy~~ **DECIDED: member-claim only** (§4.4); no silent
+ AppView cross-context merge.
+3. **Proof of kit control** for member-claim (§6) — admin-vouch vs. vendor credential
+ vs. a challenge the member completes. Blocks the claim flow, not the registry.
+4. **`research.*` vs. fold into group-project schema (D5)** — separate schema
+ (recommended for the PII-free invariant clarity) vs. co-locate with projects.
+5. **Merge mechanics** — keep-lower-id vs. mint-new on merge; how `subject_link`
+ retirement cascades to memberships/assertions. Define with D4.
+
+## 11. Next step
+
+§10 Q1–Q2 decided (id-exchange-over-D1, member-claim-only). **AppView registry BUILT
+(2026-06-12):** `research.*` schema (mig 0033 — research_subject [+`retired_into`
+tombstone], subject_membership → `social.group_project`, subject_link audit,
+subject_biosample) + `du_db::research` (register_in_project, **tombstone** merge_subjects
+[repoint + audit, no delete], set_custody, link_biosample, authz readers) + `du-web`
+`/api/v1/research/*` endpoints, **signature-authenticated** (`crate::sig::verify_signed`,
+shared with D1) **and authorized** from existing data (register → project owner; merge →
+steward of both; custody → subject's steward; read → project participant). PII-free
+invariant holds. Memory `research-subject-registry`.
+
+**Remaining (Navigator / D4 / D5):** the Navigator `subject_shared_id` local map + the
+D1 id-exchange join-flow that *populates* the registry (DUNavigator); proof-of-kit-control
+for member-claim (§10 Q3); `current_view` + `same_person` assertions (D4); full
+project-admin ACL beyond owner-gating (D5). §4.2's genetic resolver is the D3 candidate
+engine (already built), sharing D1's channel.
diff --git a/documents/planning/d3-ibd-matching-impl.md b/documents/planning/d3-ibd-matching-impl.md
new file mode 100644
index 00000000..70dbb954
--- /dev/null
+++ b/documents/planning/d3-ibd-matching-impl.md
@@ -0,0 +1,297 @@
+# D3 — IBD Matching: Rust Implementation Spec
+
+**Status:** Design (v0.1, 2026-06-12 — added §3.0/3d/3e scope control: ancestry
+blocking + matches-of-matches graph expansion + query-vs-panel cold start, so
+candidate generation is never N:N and the AppView emits only bounded top-K lists;
+flagged the D1-independent first slice). AppView roadmap §5 D3. **Implements** the
+original IBD requirements **on top of D1**
+(`d1-encrypted-edge-exchange.md`) and the actual Rust schema; **supplies** D2's
+genetic resolver (`d2-research-subject-registry.md` §4.2). This doc now carries the
+requirements (the standalone planning doc was removed as superseded) and the build
+spec. **Cross-repo:** AppView coordinator +
+Navigator Edge analysis (`navigator-analysis/src/ibd.rs` already exists).
+
+## 1. What changed since the planning doc (the refresh)
+
+The original doc (Scala/Tapir era) invented its own crypto, key exchange, and P2P
+channel. Three things change:
+
+1. **The channel is D1, not bespoke.** IBD is now just a **consumer of the
+ `exchange.*` substrate** with `purpose ∈ {IBD_AUTOSOMAL, IBD_X, IBD_Y, IBD_MT}`.
+ Drop the IBD-specific ECDH/relay invention (D1 owns it). The planning doc's
+ `ibd.match_request`/`match_consent` **fold into `exchange.exchange_request`/
+ `exchange_consent`** (D1 §8); IBD-specific tables (`ibd_discovery_index`,
+ `ibd_pds_attestation`, `match_suggestion`) stay.
+2. **Rust, not Scala.** The four service traits become `du-db` query modules +
+ `du-web` axum handlers + a `du-jobs` discovery job. Concrete SQL below.
+3. **AppView mines candidates from `fed.*` anonymized aggregates only.** It never
+ touches raw genotypes; the actual IBD segment detection is **Edge-to-Edge** over
+ D1. (Unchanged in spirit from the doc's security section; made concrete here.)
+
+## 2. Architecture (one line each)
+
+- **AppView (coordinator, PII/genotype-free):** mine candidate pairs from `fed.*`
+ → `match_suggestion`; broker request + **dual-consent** (via `exchange.*`/D1);
+ notify both Edges "match-ready"; verify + index **attestations** (match *summaries*
+ only); serve match-list API. Never sees a genotype.
+- **Edge (Navigator, holds genotypes):** establish the D1 session; exchange encrypted
+ variant positions / segment boundaries; run the IBD algorithm
+ (`navigator-analysis::ibd`); cross-verify; sign + attest; classify relationship.
+
+## 3. Candidate mining (the discovery engine) — `du-jobs` + `du-db::ibd`
+
+A scheduled `du-jobs` job (`ibd-discovery-recompute`, alongside the existing
+`branch-age-recompute`), incremental per sample. Three signals → `ibd.match_suggestion`
+rows (existing table: `target_sample_guid`, `suggested_sample_guid`, `suggestion_type`,
+`score`, `status`), ranked. All inputs are **anonymized `fed.*` / `ibd.*` aggregates**.
+
+### 3.0. Scope control — block, don't pair (the load-bearing principle)
+
+The AppView must **never materialize an N×N pair list**, and must **never hand a
+Navigator client "everyone."** Each sample gets a **bounded, ranked, top-K candidate
+list**; the Edge then runs IBD (or a query-vs-panel search) only against that K — so
+each client is O(K), not O(N). Two cheap mechanisms keep candidate generation
+near-linear (this is record-linkage *blocking* + graph expansion, not all-pairs
+scoring):
+
+- **Block by ancestry before scoring.** Bucket samples by a cheap key and only score
+ *within* buckets: a coarse block on the continental rollup
+ (`fed.population_breakdown.super_population_summary`) drops cross-continental pairs
+ outright, and a finer block on the published **PCA coordinates**
+ (`fed.population_breakdown.pca_coordinates` — grid-bin or LSH the PCA space) restricts
+ the overlap computation to near neighbours. O(N²) → ~O(N·k).
+- **Expand the match graph (matches-of-matches) as the steady state.** Once a sample
+ has any confirmed edge in `ibd_discovery_index`, its best new candidates are its
+ **2-hop neighbourhood** — cheap graph traversal, not pairing. Ancestry-blocking is
+ only the **cold-start seeder**; graph expansion is the primary generator thereafter.
+
+Cap to **top-K per sample** by combined score (`expires_at` ages out the rest). The
+existing `ibd.population_overlap_score` table is therefore populated **only for
+within-block pairs, incrementally as samples arrive** — never the full N². Research
+backing: §3e.
+
+### 3a. Haplogroup match (cheapest; gates the rest for Y/MT)
+Same terminal Y or mt haplogroup ⇒ candidate patriline/matriline match.
+```sql
+-- suggestion_type = 'HAPLOGROUP'; region from which haplotype matched
+INSERT INTO ibd.match_suggestion (target_sample_guid, suggested_sample_guid, suggestion_type, score, metadata)
+SELECT a.sample_guid, b.sample_guid, 'HAPLOGROUP',
+ depth_score(a.haplogroup, b.haplogroup), -- deeper shared terminal = higher
+ jsonb_build_object('region', 'Y', 'haplogroup', a.haplogroup)
+FROM fed.haplogroup_reconciliation a
+JOIN fed.haplogroup_reconciliation b
+ ON a.dna_type = b.dna_type AND a.haplogroup = b.haplogroup AND a.sample_guid < b.sample_guid
+WHERE a.dna_type = 'Y'; -- and again for 'Mt'
+```
+
+### 3b. Population overlap (autosomal candidate gate)
+`Σ min(A[pop], B[pop])` over `ibd.population_breakdown`; **cached** in
+`ibd.population_overlap_score`. Never compute the full N²: the `gated_pairs` set is the
+**ancestry block** from §3.0 — same `super_population_summary` bucket **and** the same
+PCA grid/LSH cell (`fed.population_breakdown.pca_coordinates`), plus the haplogroup
+bucket for Y/mt-line requests. Score only those within-block pairs; persist
+incrementally as samples join.
+```sql
+-- overlap from the cached breakdown JSONB; only for pre-gated pairs
+WITH pair_overlap AS (
+ SELECT s1, s2, SUM(LEAST(p1.frac, p2.frac)) AS score
+ FROM gated_pairs g
+ JOIN ibd.population_breakdown_cache c1 ON c1.sample_guid = g.s1, jsonb_each_text(c1.breakdown) p1(pop, frac_t)
+ JOIN ibd.population_breakdown_cache c2 ON c2.sample_guid = g.s2, jsonb_each_text(c2.breakdown) p2(pop, frac_t)
+ WHERE p1.pop = p2.pop -- (frac cast to double)
+ GROUP BY s1, s2)
+INSERT INTO ibd.match_suggestion (...) SELECT s1, s2, 'POPULATION_OVERLAP', score, ...
+FROM pair_overlap WHERE score >= :min_overlap; -- default 0.6
+```
+
+### 3c. Shared-match (the **primary** generator once the graph is seeded)
+The "in-common-with" / shared-match principle (the basis of every consumer clustering
+tool — the Leeds Method, AutoClusters): samples that match the same third parties share
+common ancestors. This is **2-hop graph expansion** over `ibd_discovery_index`, not
+all-pairs scoring — cheap and high-yield, so it is the steady-state generator (§3.0).
+```sql
+-- over confirmed matches in ibd_discovery_index (the match graph)
+SELECT a.other AS s1, b.other AS s2, COUNT(*) AS shared
+FROM matches_of a JOIN matches_of b ON a.match = b.match AND a.other < b.other
+GROUP BY a.other, b.other HAVING COUNT(*) >= :min_shared -- default 2
+-- → suggestion_type = 'SHARED_MATCH', score = shared count
+```
+(`matches_of` = a view unnesting `ibd_discovery_index` into (sample, matched-sample).)
+**Endogamy caveat:** pedigree collapse / endogamous ancestries smear clusters together
+(everyone shares everyone), inflating false candidates. Detect via PCA-cell density /
+ancestry tag and **cap + down-weight** `SHARED_MATCH` there (and prefer larger
+`min_shared`).
+
+### 3d. Cold start = query-vs-panel, not panel-vs-panel
+A brand-new sample has no graph edges to expand (§3c) — seed it from the **ancestry
+block** (§3.0/3b) only. Critically, the Edge then does a **one-vs-many query against
+that block as a panel**, not an N:N comparison (RaPID-Query-class search: a single
+query against a biobank-scale panel in seconds, error-tolerant). The AppView's job is
+to **supply the right panel subset** (the block) — never an all-pairs list. After the
+first few confirmed matches land, the sample switches to graph expansion.
+
+**Ranking & lifecycle:** combine the three scores (weighted), dedupe per pair, **cap to
+top-K per target**, expire stale suggestions (`status` ACTIVE/DISMISSED/EXPIRED/
+CONVERTED, `expires_at`). `du-db::ibd::suggestions_for(sample|did, limit)` serves them
+ranked. The AppView emits only this bounded list — the no-N:N guarantee (§3.0).
+
+### 3e. Research backing
+- **Don't conflate detection with selection.** Genotype-level all-pairs IBD *detection*
+ (PBWT family: RaPID, hap-IBD, 23andMe/Ancestry TPBWT, the newer kL-SMEM/PBML work) is
+ the **Edge's** job — the AppView holds no genotypes. The AppView solves *candidate
+ selection* (metadata blocking + graph expansion).
+- **Query-vs-panel** ([RaPID-Query](https://pmc.ncbi.nlm.nih.gov/articles/PMC10244210/),
+ [L-PBWT-Query](https://pmc.ncbi.nlm.nih.gov/articles/PMC6612857/)) is the Edge-side
+ one-vs-many that makes a new joiner O(panel-query), not O(N²) — the basis of §3d.
+- **Ancestry/PCA blocking** is standard record-linkage blocking (Christen, *Data
+ Matching*) — the basis of §3.0/3b.
+- **Shared-match clustering** ([Leeds Method](https://www.pricegen.com/dna-shared-matches-and-clustering/),
+ [AutoClusters](https://www.gedmatch.com/blog/what-are-dna-autoclusters/)) is the basis
+ of §3c, including the documented [endogamy failure mode](https://dna-explained.com/2025/07/10/how-to-use-ancestrys-new-match-clusters-and-what-they-mean/).
+
+## 4. Request + dual-consent (on `exchange.*` / D1)
+
+The planning doc's Phase 2 maps directly onto D1's request→consent gate; IBD adds only
+the `purpose` and the discovery reason:
+
+1. Requester writes a signed `exchange_request` PDS record (`purpose=IBD_*`,
+ `details = {requesterSampleUri, discoveryReason, regionType}`) → AppView mirrors it
+ (`exchange.exchange_request`), notifies target.
+2. Both parties sign `exchange_consent`; AppView **verifies both signatures** (the
+ dual-consent gate, D1 Invariant 2) and flips the request to `CONSENTED`.
+3. AppView emits **exchange-ready** to both Edges (D1 §5 step 4) with `partnerDid` +
+ `partnerExchangeKeyUri`.
+
+`du-db::ibd` (or `du-db::exchange`): `create_request`, `record_consent`,
+`mutual_consent(request_uri)`, `pending_for(did)`. `du-web` routes under
+`/api/v1/exchange/*` (shared) with IBD-specific discovery context.
+
+## 5. Edge handoff = a D1 session (the only IBD-specific Edge logic)
+
+Once exchange-ready, Phase 3 *is* a D1 session — no bespoke channel:
+
+1. D1 ECDH session (`purpose=IBD_Y` etc.), per D1 §4–5.
+2. Exchange `payload_type ∈ {VARIANT_POSITIONS, SEGMENT_BOUNDARIES}` (D1 §7) over the
+ blind relay.
+3. **Both Edges run the IBD algorithm locally** (`navigator-analysis::ibd`):
+ - **Autosomal/X:** IBD *segment* detection over shared positions → `{totalSharedCm,
+ numSegments, largestSegmentCm}`.
+ - **Y:** STR genetic distance + terminal-SNP concordance (patriline TMRCA estimate).
+ - **MT:** HVR/coding mutation distance (matriline).
+4. **Cross-verify:** both hash the canonical summary (SHA-256); matching hashes confirm
+ a valid, agreed result (D1 §5 step 7a; planning doc Phase 3.3).
+5. **Attest:** each signs the summary with its Ed25519 PDS key, writes an attestation
+ record to its PDS.
+
+The IBD algorithm itself is **Edge analysis** (Navigator), out of scope for AppView;
+`navigator-analysis/src/ibd.rs` is its home. AppView only ever sees the *summary*.
+
+## 6. Attestation indexing (`du-jobs` Jetstream + `du-db::ibd`)
+
+AppView's Jetstream consumer already ingests `fed.*`; add the IBD attestation
+collection. On both attestations for a request:
+- `verify_attestations`: both Ed25519 signatures valid (`du-atproto::signature`) **and**
+ `matchSummaryHash == partnerSummaryHash` (the two Edges agreed).
+- Index `ibd.ibd_discovery_index` (pair, `match_region_type`, `total_shared_cm_approx`,
+ `num_shared_segments_approx`, `consensus_status`) + two `ibd.ibd_pds_attestation`
+ rows. Mark the `match_suggestion` `CONVERTED`.
+- `update_consensus_status`: INITIAL_REPORT → CONFIRMED on matching dual attestation;
+ DISPUTE on mismatch.
+
+**Only summaries are indexed** — never positions, never genotypes (planning doc
+Security; D1 Invariant 1).
+
+## 7. Relationship classification → feeds D2's resolver
+
+The IBD summary is classified into a relationship band (standard autosomal cM ranges),
+Edge-side, and the band drives **both** the match UI **and** D2:
+
+| Band | ~Shared | Action |
+| --- | --- | --- |
+| **Same person / identical** | ~full genome (autosomal) · Y+mt identical | **→ D2 §4.2 merge suggestion** (`subject_link method=GENETIC`), never auto |
+| Parent/child, full sib | ~2550 / ~2550 cM | close-kin match; surface prominently |
+| 2nd–4th cousin … | banded by cM | normal match list |
+| Y-only / MT-only | patriline/matriline | lineage match (no autosomal claim) |
+
+So D3 *is* D2's genetic resolver: a confirmed **same-person** (or near-identical)
+classification emits a pseudonymous merge suggestion into the ResearchSubject layer
+(`research.subject_link`, method `GENETIC`, with the confidence), which the group
+accepts via a `same_person` assertion (D4). Close-kin bands stay in the match list,
+not the subject-merge path.
+
+## 8. API surface (`du-web`, axum + utoipa)
+
+```
+GET /api/v1/ibd/suggestions?limit= -> ranked match_suggestion[] (auth: owner DID)
+POST /api/v1/ibd/suggestions/:id/dismiss
+POST /api/v1/exchange/requests -> create (purpose=IBD_*) (shared w/ D1)
+GET /api/v1/exchange/requests/pending
+POST /api/v1/exchange/consent
+POST /api/v1/ibd/attestation -> Edge submits signed summary (planning doc §4.4)
+GET /api/v1/ibd/matches?sample= -> confirmed matches (summaries only)
+GET /api/v1/ibd/matches/:a/:b
+```
+DTOs: `MatchSuggestionDto`, `MatchDto {totalSharedCm, numSegments, largestSegmentCm,
+regionType, consensusStatus}`, `AttestationSubmission`. No genotype/position DTOs exist
+by construction.
+
+## 9. Schema deltas
+
+- `ibd.*` (mig 0007) mostly stands: `ibd_discovery_index`, `ibd_pds_attestation`,
+ `match_suggestion`, `population_*`, `validation_service` — keep.
+- **Generalize** `ibd.match_request`/`ibd.match_consent` → `exchange.exchange_request`/
+ `exchange_consent` (D1 §8) via a migration; an `ibd` *view* over `purpose='IBD_*'`
+ preserves call sites if useful.
+- Add the **IBD attestation Jetstream collection** to the consumer's
+ `INGEST_COLLECTIONS`.
+- `match_suggestion` already has `metadata JSONB` for discovery reason — no change.
+
+## 10. Module placement
+
+- **AppView:** `du-db::ibd` (mining SQL, suggestions, match indexing, attestation
+ verify), `du-web::routes::ibd` (+ shared `exchange` routes), `du-jobs`
+ `ibd-discovery-recompute` job + attestation ingest in the Jetstream consumer.
+- **Navigator (Edge):** `navigator-analysis::ibd` (the segment/distance algorithms),
+ `navigator-sync` (D1 session driver + attestation publish), reusing D1's
+ `du-exchange`.
+- **Shared:** `du-domain` for the relationship-band thresholds + canonical
+ summary-hash format (so Edge and AppView agree on what's signed).
+
+## 11. Privacy invariants (restate, they're load-bearing)
+
+- AppView mines candidates from **anonymized `fed.*` aggregates** only (haplogroups,
+ population breakdowns, the match graph). No raw genotype ever reaches it.
+- Edge-to-Edge exchange carries positions/segments **encrypted via D1**; AppView sees
+ only signed **summaries** (cM, segment counts).
+- Same-person merge suggestions to D2 are **pseudonymous** (`research_subject_id`s),
+ never carrying an identifier.
+
+## 12. Open questions / decisions
+
+1. **IBD algorithm provenance** — does `navigator-analysis::ibd` implement segment
+ detection from scratch, or wrap a known method? (Affects Edge effort, not AppView.)
+2. **Phasing requirement** — autosomal IBD wants phased haplotypes; do we require
+ phasing on the Edge, or do unphased segment detection (lower precision)?
+3. **Shared-match cold start** — 3c needs an existing match graph; bootstrap from
+ 3a/3b only until the graph fills. Confirm acceptable.
+4. **Population-overlap N² control** — the pre-gate (3b) must keep pair counts sane;
+ define the gate (haplogroup/region bucket) precisely.
+5. **`match_request`→`exchange_request` migration timing** — do it with D1's schema or
+ lazily. Recommend with D1 (one migration).
+
+## 13. Next step
+
+D3 closes the **Match track** (D1→D3). **D1-independent first slice — BUILT
+(2026-06-12):** candidate generation (§3) needs *no* exchange channel — `du_db::ibd::
+recompute_suggestions` reads `fed.population_breakdown` (PCA coords + components) +
+`fed.haplogroup_reconciliation` + the `ibd_discovery_index` graph and writes ranked
+`ibd.match_suggestion` rows. Ancestry blocking (§3.0: dominant super-pop × z-scored
+PCA cell), shared-haplogroup + 2-hop shared-match (§3c) signals, weighted-combine +
+**top-K cap** (§3.4 no-N:N), declarative recompute preserving DISMISSED/CONVERTED.
+`du-jobs run-once ibd-discovery-recompute` + daily; `suggestions_for` reader; advisory-
+locked. **Engine-only — no public API** (candidate pairs gate on consent). Test
+`du-db/tests/ibd_suggestions.rs` (blocking + signals + top-K + idempotency + dismiss).
+The exchange/consent reuse of D1 + the attestation-ingest/index path layer on once D1's
+channel exists. Then the **Platform
+track** continues at **D4 (assertion store, split rails)**, which consumes D3's
+same-person output (§7) and D1's PII channel.
diff --git a/documents/planning/d4-assertion-store.md b/documents/planning/d4-assertion-store.md
new file mode 100644
index 00000000..645fc780
--- /dev/null
+++ b/documents/planning/d4-assertion-store.md
@@ -0,0 +1,209 @@
+# D4 — Assertion Store (Split Rails)
+
+**Status:** Design (v0, 2026-06-06). AppView roadmap §5 D4 — the **collaboration
+primitive**. **Uses** D1 (PII channel), D2 (`research_subject` registry), **consumes**
+D3 (same-person → assertion); **feeds** D5 (group-project ACL/UI). **Cross-repo:**
+AppView non-PII store + Navigator local store (all PII).
+
+## 1. Purpose
+
+Co-admin research is modeled as **attributed, scoped, append-only assertions** over a
+`research_subject_id`, not as direct mutation of shared rows. One shape carries
+branch assignments, same-person links, haplogroup labels, MDKA, and notes — and the
+**PII-ness of each assertion decides whether it can ever touch a server**.
+
+## 2. The assertion shape
+
+```
+Assertion {
+ id,
+ subject: research_subject_id, -- pseudonymous (D2)
+ predicate: SAME_PERSON_AS | BELONGS_TO_BRANCH | HAPLOGROUP_IS | MDKA_IS | NOTE | ...,
+ value: ,
+ author_did, -- attribution
+ scope: PUBLIC | PROJECT() | LOCAL, -- visibility/consent boundary
+ evidence: optional (STR distance, SNP, IBD summary ref, doc citation),
+ created_at,
+ supersedes: Assertion.id | null, -- append-only edit chain
+ retracted_at: ts | null,
+}
+```
+
+Append-only + `supersedes`/`retracted_at` gives **conflict-with-provenance**: two
+admins disagree → two live assertions, both attributed; nothing is silently
+overwritten (§6).
+
+## 3. Predicate catalog — PII class drives the rail
+
+| Predicate | `value` | PII? | Rail (§4) |
+| --- | --- | --- | --- |
+| `SAME_PERSON_AS` | `{other_subject_id, confidence, method}` | no (pseudonymous ids) | non-PII |
+| `BELONGS_TO_BRANCH` | `{clade_path / haplogroup_node}` | no | non-PII |
+| `HAPLOGROUP_IS` | `{dna_type, haplogroup, status}` | no (a classification, not an identifier) | non-PII |
+| `MDKA_IS` | `{lineage, ancestor_name, dates, place, lat/long}` | **YES** (names/places) | **PII → P2P only** |
+| `IDENTITY` | `{member_name, external_ids[]}` | **YES** | **PII → P2P only** |
+| `NOTE` | `{text}` | **maybe** (free text) | **PII rail by default**; non-PII only if author marks "no PII" |
+
+**Rule:** predicate PII-class is the *default*; free-text (`NOTE`) defaults to the PII
+rail unless the author explicitly clears it. A value-level scrubber can flag obvious
+PII (emails/names) and force the PII rail regardless (mirrors the FTDNA `Note`-column
+lesson — free text can't be auto-cleaned, so it's PII until proven otherwise).
+
+## 4. The three rails (PII-ness × scope)
+
+```
+ │ scope=PUBLIC │ scope=PROJECT(id) │ scope=LOCAL
+ ───────────────┼─────────────────────────┼────────────────────────────┼──────────────
+ non-PII │ R1: PDS public record │ R2: AppView project store │ local only
+ │ → du-jobs ingest │ (current_view, D5 ACL) │
+ ───────────────┼─────────────────────────┼────────────────────────────┼──────────────
+ PII │ ✗ FORBIDDEN │ R3: D1 encrypted P2P only │ local only
+ │ (consent can't make │ (folded LOCALLY, never │
+ │ PII public here) │ on AppView) │
+```
+
+- **R1 — PDS public record (non-PII, public):** e.g. `HAPLOGROUP_IS` when the member
+ consents to public. A signed `com.decodingus.research.assertion` record in the
+ author's PDS → ingested by du-jobs into the AppView store, same path as `fed.*`.
+- **R2 — AppView project store (non-PII, project-scoped):** e.g. `BELONGS_TO_BRANCH`
+ within a project. Held in `research.assertion` (it's **not PII**), served only to the
+ project's admin team (D5 ACL). This is consistent with "no PII in AppView" — these
+ rows carry **no identifiers**.
+- **R3 — D1 P2P (PII):** `MDKA_IS`, `IDENTITY`, PII `NOTE`. Travels as a D1
+ `PII_ASSERTION` payload (D1 §7), folded into each recipient admin's **local** store.
+ **Never** a PDS record, **never** an AppView row.
+
+**PII can never be public (R1 cell is ✗):** even with member consent, MDKA/names don't
+go to a world-readable record — consent raises visibility to the project circle (R3),
+not the world. (If a member truly wants their own ancestor public, that's their PDS
+choice post-claim, outside this layer.)
+
+## 5. Consent-flag enforcement (roadmap Q4)
+
+The FTDNA roster's `publicly_shares` (per member, on `ftdna_member`) and `access_granted`
+set the **maximum scope** an admin's Navigator may assign to assertions about that
+subject:
+- `publicly_shares = NO` → assertions about that subject are capped at `PROJECT`
+ (R2/R3); the client refuses to emit a `PUBLIC` (R1) assertion.
+- Default everything to `PROJECT` scope; `PUBLIC` requires an explicit, consent-backed
+ opt-in.
+Enforced **Navigator-side at emit time** (the producer), and re-checked at the AppView
+ingest boundary for R1 (reject public assertions about a subject flagged non-public —
+though AppView only knows the pseudonym, so this is primarily a client-side guarantee).
+
+## 6. current_view — fold with conflict-and-provenance
+
+AppView materializes a **per-(subject, predicate) `current_view`** from the **live**
+(non-retracted, non-superseded) **non-PII** assertions (R1+R2). PII (R3) is folded the
+same way but **locally in each Navigator**, never centrally.
+
+- **Single-valued predicates** (`HAPLOGROUP_IS`, `BELONGS_TO_BRANCH`): if one live
+ assertion → settled; if ≥2 disagree → **`DISPUTED`**, surfacing all claims with
+ `author_did` + `created_at` + `evidence`. The group resolves by an admin issuing a
+ superseding assertion (or a `RESOLVES` meta-assertion) — never auto-collapsed.
+- **Set-valued** (`NOTE`, multiple `SAME_PERSON_AS`): all live members shown.
+- **`SAME_PERSON_AS`** additionally **drives a D2 merge**: an accepted same-person
+ assertion writes `research.subject_link` (method `ASSERTION`) and merges the two
+ subjects' views (D2 §5). D3's genetic same-person (§7) arrives as a pre-filled
+ `SAME_PERSON_AS` with `method=GENETIC` + IBD evidence, awaiting group accept.
+
+Materialization runs on ingest (R1/R2) like the existing `fed.*` reporting fold;
+`du-db::research::refold(subject_id)` after each new assertion.
+
+## 7. Branch/clade assertions vs. the curated tree (roadmap Q3)
+
+`BELONGS_TO_BRANCH` assertions are a **project's** view of where its subjects sit — they
+are **not catalog truth**. They are surfaced **against** the curated AppView haplotree
+(`tree.*`), never merged into it. A project's clade tree = the fold of its
+`BELONGS_TO_BRANCH` assertions, rendered alongside (and reconcilable with) the
+authoritative tree. Promotion of a project finding into the catalog goes through the
+existing **curator change-set** path (`tree.change_set`), not silently.
+
+## 8. Retraction & supersede
+
+- **Supersede:** an edit is a new assertion with `supersedes = old.id`; the chain head
+ is "live." Preserves full history + attribution.
+- **Retract:** `retracted_at` set; drops out of `current_view` but stays for audit.
+- **PII (R3):** retraction is a P2P `PII_ASSERTION` with a `retract` op; recipients drop
+ it from their **local** fold. (No central enforcement possible — by design; the
+ recipient already had the plaintext, exactly as in any E2E system.)
+
+## 9. Schema
+
+**AppView (`research.*`, non-PII only — R1/R2):**
+```sql
+CREATE TABLE research.assertion (
+ id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
+ subject_id UUID NOT NULL REFERENCES research.research_subject(research_subject_id) ON DELETE CASCADE,
+ predicate TEXT NOT NULL, -- SAME_PERSON_AS | BELONGS_TO_BRANCH | HAPLOGROUP_IS | NOTE(non-PII)
+ value JSONB NOT NULL,
+ author_did TEXT NOT NULL,
+ scope TEXT NOT NULL, -- PUBLIC | PROJECT:
+ evidence JSONB,
+ record_uri TEXT, -- at:// of the PDS record (R1) if any
+ supersedes_id BIGINT REFERENCES research.assertion(id),
+ retracted_at TIMESTAMPTZ,
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now()
+);
+CREATE INDEX assertion_subject_pred_idx ON research.assertion (subject_id, predicate) WHERE retracted_at IS NULL;
+
+CREATE TABLE research.subject_current_view ( -- materialized fold (non-PII)
+ subject_id UUID NOT NULL, predicate TEXT NOT NULL,
+ state TEXT NOT NULL, -- SETTLED | DISPUTED
+ view JSONB NOT NULL, -- live claims + authors + evidence
+ refolded_at TIMESTAMPTZ NOT NULL DEFAULT now(),
+ PRIMARY KEY (subject_id, predicate)
+);
+```
+**No PII column exists** in `research.*` — a reviewer can confirm `MDKA_IS`/`IDENTITY`
+have no server table.
+
+**Navigator local (all assertions incl. PII):**
+```sql
+CREATE TABLE assertion_local (
+ id INTEGER PRIMARY KEY, subject_guid TEXT, research_subject_id TEXT,
+ predicate TEXT, value TEXT, -- JSON; PII lives ONLY here + D1 payloads
+ author_did TEXT, scope TEXT, evidence TEXT,
+ supersedes INTEGER, retracted_at TEXT, created_at TEXT NOT NULL
+);
+```
+
+**Lexicon:** `com.decodingus.research.assertion` (R1 public records only). PII
+assertions have **no lexicon** — they are D1 `PII_ASSERTION` payloads (D1 §7), by
+construction never recordable.
+
+## 10. Module placement
+
+- **AppView:** `du-db::research` (assertion CRUD, `refold`, current_view, subject_link
+ on same-person), `du-web::routes::research` (project-scoped assertion API + ACL via
+ D5), du-jobs Jetstream ingest of the `research.assertion` collection (R1).
+- **Navigator:** `assertion_local` store + the local fold + the D1 driver to emit/ingest
+ `PII_ASSERTION` payloads (R3) and publish R1 records (via `PdsClient`).
+- **Shared `du-domain`:** the `Assertion` shape, predicate catalog + PII-class table,
+ and the fold rules (so Edge and AppView fold identically).
+
+## 11. Open questions / decisions
+
+1. **PII classifier strictness** — predicate-default + value scrubber (recommended).
+ Confirm `NOTE` defaults to PII (safer) vs. defaults to non-PII with an opt-in PII
+ flag. Recommend **PII-by-default for free text**.
+2. **current_view storage** — materialized table (recommended, mirrors `fed.*`
+ reporting) vs. compute-on-read. Materialize.
+3. **Dispute resolution authority** — any admin supersedes vs. owner/role-gated (D5).
+ Likely role-gated; finalize with D5.
+4. **R1 ingest consent re-check** — AppView can only see the pseudonym, so public-scope
+ enforcement is primarily client-side; accept that, or add a per-subject "public-ok"
+ pseudonymous flag the member sets on claim? Recommend client-side + claim-time flag.
+5. **Cross-project assertion leakage** — a subject in two projects: are PROJECT-scoped
+ assertions isolated per project, or visible to any project the subject is in?
+ Recommend **per-project isolation** (scope = the specific project).
+
+## 12. Next step
+
+D4 + D2 + D1 are the full **private collaboration stack**: registry (D2) + channel
+(D1) + the attributed-claim primitive (D4), with D3 feeding genetic same-person. The
+buildable slice: `research.assertion` + `refold`/current_view + the Navigator
+`assertion_local` store + D1 `PII_ASSERTION` round-trip + R1 public-record ingest —
+provable by a `BELONGS_TO_BRANCH` (R2) and an `MDKA_IS` (R3) between two test admins.
+Then **D5 (group-project reconciliation)** adds the admin-team ACL, roles, and UI that
+gate all of R2/R3 and resolve disputes.
diff --git a/documents/planning/d5-group-project-reconciliation.md b/documents/planning/d5-group-project-reconciliation.md
new file mode 100644
index 00000000..d693b249
--- /dev/null
+++ b/documents/planning/d5-group-project-reconciliation.md
@@ -0,0 +1,225 @@
+# D5 — Group-Project Reconciliation + Admin-Team ACL
+
+**Status:** Design (v0, 2026-06-06). AppView roadmap §5 D5 — closes the **Platform
+track**. **Reconciles** `proposals/group-project-system.md` (the member-sovereign
+proposal) with **D1–D4**, and supplies the **admin-team ACL** that gates D4's R2/R3,
+D1's PII introductions, and D4's dispute-resolution authority.
+
+## 1. Purpose
+
+A *project* is the unit of collaboration and the **consent/scope boundary** every
+prior doc references (`scope=project:`). D5 defines what a project is, **who is in
+its trust circle**, the **roles/permissions** that gate the stack, and how the
+existing member-sovereign proposal and the FTDNA admin-stewarded bootstrap are **one
+lifecycle, not two systems**.
+
+## 2. The reconciliation — two modes on one lifecycle
+
+`group-project-system.md` assumes members are **on-platform, own a PDS, and
+self-manage visibility** (sovereign). The FTDNA bootstrap (D2/D4) assumes the studied
+people are **not on platform** — admins import and steward them. These are the **ends
+of one lifecycle**:
+
+| | **Stewarded mode** (FTDNA bootstrap) | **Sovereign mode** (the proposal's target) |
+| --- | --- | --- |
+| Studied person | pseudonymous `research_subject`, **no DID** | a member **DID**, self-present |
+| PII custody | the steward admin (local + P2P) | the member's own PDS |
+| Visibility control | admin team, **capped by the consent flag** | the member, **per-field opt-in** |
+| Governance | admin team (D5 roles) | member self-sovereignty + admin governance |
+
+A subject moves stewarded → sovereign by **member-claim** (D2 §6). A single project can
+hold **both** kinds of subject at once; D5 handles the union.
+
+## 3. Two memberships — the disentanglement the proposal needs
+
+The proposal conflates "member" (the studied person) with "participant" (a DID in the
+project). D1–D4 require these be **separate**:
+
+- **Collaborator team** = the **DIDs** who run/contribute to the project, each with a
+ **role**. This is the **trust circle / ACL / consent boundary**. D1 brokers PII
+ exchange *between these DIDs*; D4 R2 is served *to these DIDs*; disputes are resolved
+ *by these DIDs* (per role). → AppView `project_member` (D5).
+- **Subject membership** = which **`research_subject`s** (pseudonymous studied people)
+ belong to the project. → D2 `research.subject_membership` (already exists).
+
+In stewarded mode these are disjoint (admins ≠ subjects). In sovereign mode a claimed
+member is **both** a collaborator DID *and* a subject (their `custody_did` = their
+team DID). D5's ACL is over the **collaborator team**, never the subjects.
+
+## 4. Roles & permissions (adopt the proposal's, bind to the stack)
+
+Keep the proposal's `projectRole` model — `ADMIN`, `CO_ADMIN`, `MODERATOR`, `CURATOR`
++ granular permissions (`APPROVE_MEMBERS`, `MANAGE_ROLES`, …) — and bind each to what
+it gates across D1–D4:
+
+| Capability | Min role/permission | Gates |
+| --- | --- | --- |
+| Join the PII exchange circle (D1) | any team member (`ADMIN`/`CO_ADMIN`) | D1 broker checks team membership before relaying |
+| Write R2 project assertions (D4) | `CO_ADMIN`+ (or `MANAGE_ASSERTIONS`) | D4 R2 accept |
+| Read R2 project current_view | any team member | D4 R2 serve / D5 ACL |
+| Resolve a dispute (supersede) | `ADMIN`/`CURATOR` (D4 §11.3) | D4 fold resolution |
+| Invite/approve collaborators, set roles | `ADMIN` (`MANAGE_ROLES`) | D5 membership |
+| Promote a finding to the catalog tree | `CURATOR` → existing `tree.change_set` | catalog (D4 §7) |
+
+`MODERATOR` ≈ community management (sovereign-mode member relations); `CURATOR` is the
+bridge to the existing tree-curation path.
+
+## 5. AppView project + ACL schema (PII-free)
+
+```sql
+CREATE TABLE research.project (
+ id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
+ project_uri TEXT UNIQUE, -- at:// of the groupProject PDS record (owner-authored)
+ name TEXT, -- project names are not member PII
+ kind TEXT, -- SURNAME | HAPLOGROUP | GEOGRAPHIC | STUDY
+ join_policy TEXT NOT NULL, -- OPEN | APPROVAL_REQUIRED | INVITE_ONLY | HAPLOGROUP_VERIFIED
+ succession TEXT NOT NULL DEFAULT 'CO_ADMIN_INHERITS',
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now()
+);
+
+CREATE TABLE research.project_member ( -- the COLLABORATOR TEAM (the ACL)
+ project_id BIGINT NOT NULL REFERENCES research.project(id) ON DELETE CASCADE,
+ member_did TEXT NOT NULL,
+ role TEXT NOT NULL, -- ADMIN | CO_ADMIN | MODERATOR | CURATOR
+ permissions TEXT[] NOT NULL DEFAULT '{}',
+ appointed_by TEXT,
+ joined_at TIMESTAMPTZ NOT NULL DEFAULT now(),
+ left_at TIMESTAMPTZ, -- revocation (§8)
+ PRIMARY KEY (project_id, member_did)
+);
+CREATE INDEX project_member_did_idx ON research.project_member (member_did) WHERE left_at IS NULL;
+```
+
+`research.subject_membership` (D2) holds the studied subjects. **No PII anywhere** —
+DIDs, roles, pseudonymous subject ids, and project *names* (which are not member PII).
+
+## 6. The ACL enforces across the whole stack
+
+`project_member` (live rows) is the single ACL the broker and assertion store consult:
+
+- **D1 PII exchange:** before relaying a `GENEALOGY_PII` envelope between A and B, the
+ broker checks **both are live `project_member`s** of the named project (and the
+ request `scope` matches). Non-members can't be in the circle.
+- **D4 R2 (project assertions):** read = any live team member; write/dispute = role per
+ §4. AppView enforces on the `research.assertion` API.
+- **D4 R3 visibility cap:** an admin may only emit PII (R3) to **fellow team members**;
+ the consent flag (`publicly_shares`) still caps *public* scope (D4 §5).
+- **Revocation:** `left_at` set → immediately drops from the ACL (proposal's "Revocable
+ Participation"). Caveat §8.
+
+## 7. The proposal's aggregate records = R1/R2 (don't duplicate)
+
+The proposal's `projectTreeView`, `projectModal`, `strComparison` records are
+**aggregate, non-PII** ("genetic distance not raw STR; member counts not lists") — i.e.
+they are exactly the **R1/R2 non-PII layer** of D4 plus existing AppView aggregates:
+
+- `projectTreeView` / project clade tree = the fold of `BELONGS_TO_BRANCH` assertions
+ (D4 §7), rendered against `tree.*`.
+- `projectModal` (modal haplotype) = the existing `ystr` modal aggregation, scoped to a
+ project's subjects.
+- `strComparison` (genetic distance, not raw values) = a derived R2 aggregate.
+
+So D5 **maps the proposal's records onto D4's rails** rather than adding a parallel
+record set: the proposal's "privacy-preserving research value" *is* the non-PII
+assertion/aggregate layer. Raw individual STR/PII = R3 (P2P), never these records.
+
+## 8. PII durability & succession (a real bootstrap risk)
+
+In stewarded mode, a subject's PII lives **only in the steward admin's local store**.
+If that admin vanishes, the PII is **gone** — and so is the project's research memory.
+Mitigations D5 must specify:
+
+- **Replicate stewarded PII across the consent circle.** When co-admins join, the
+ steward **P2P-exchanges (D1) the relevant `SUBJECT_BUNDLE`s** so ≥2 team members hold
+ each subject's PII. This turns the consent circle into a redundancy set (and is the
+ same exchange that enables collaboration — durability is a free side effect).
+- **Succession** (`research.project.succession`, from the proposal): `CO_ADMIN_INHERITS`
+ (default) hands the `ADMIN` role and steward duty to a co-admin who already holds the
+ replicated PII; `MEMBER_VOTE` / `PROJECT_CLOSES` per the proposal. On `PROJECT_CLOSES`,
+ AppView drops the project + ACL; local PII remains with whoever holds it (E2E reality).
+- **Revocation caveat:** removing a DID from the ACL stops *future* access, but PII
+ already exchanged to them cannot be recalled (same E2E truth as D4 §8). The ACL gates
+ the *channel*, not memory. Surface this honestly in the UI.
+
+## 9. Membership policy & join flows
+
+- **Join policy** (`join_policy`): `OPEN` / `APPROVAL_REQUIRED` (admin approves) /
+ `INVITE_ONLY` / `HAPLOGROUP_VERIFIED` (must match a haplogroup, checked against
+ non-PII fed/assertion data). Applies to **collaborators** joining the team.
+- **Sovereign members joining** (proposal's flow): a member with a DID joins, becomes a
+ `subject` *and* (optionally) a low-privilege team member; sets their own
+ per-field visibility (which then *replaces* the admin's consent-flag cap for their
+ data). This is the claim/sovereign path (D2 §6) at project granularity.
+- **Invite** rides D1 (a signed invite → consent → `project_member` insert).
+
+## 10. Lifecycle (per project, mixed subjects allowed)
+
+```
+Admin creates project P (stewarded) → research.project + ADMIN project_member
+Admin imports FTDNA roster → pseudonymous subjects + subject_membership (D2)
+Co-admins invited → project_member rows; steward P2P-replicates PII (§8)
+Team collaborates → R2 assertions (AppView) + R3 PII (P2P) gated by ACL (§6)
+Member M onboards + claims subject S → S.custody_did=M; M becomes a team member;
+ M's per-field visibility supersedes the consent cap
+Project matures → more subjects sovereign; admins govern, members own
+```
+
+## 11. Module placement & schema deltas
+
+- **AppView:** `research.project` + `research.project_member` (new, mig in the
+ `research.*` set with D2/D4); `du-db::research::project` (ACL checks, role/permission
+ queries) consumed by the D1 broker endpoints and the D4 assertion API; the
+ `groupProject` PDS record ingested by du-jobs into `research.project`.
+- **Navigator:** local project + team roster mirror; the invite/consent + PII-replicate
+ flows over D1; per-subject visibility/consent UI.
+- **Shared `du-domain`:** the role/permission enum + the capability→permission map (so
+ Edge and AppView agree on who-can-do-what).
+- **Reconcile with the proposal:** D5 supersedes the proposal's *governance/membership*
+ sections (now AppView-enforced ACL) and *adopts* its roles/policies/succession; the
+ proposal's aggregate records map onto D4 (§7). The proposal's member-sovereign
+ visibility model = the post-claim state.
+
+## 12. Open questions / decisions
+
+1. **PII replication default** — auto-replicate every subject's PII to all co-admins
+ (max durability, max exposure within the consented circle) vs. on-demand per
+ subject. Recommend **auto-replicate within the team** (the circle is already
+ consented; durability matters) with a per-subject opt-out.
+2. **Sovereign-member privilege** — does a claimed member auto-get a team role, or stay
+ subject-only until invited? Recommend **subject-only + self-visibility control**;
+ team roles remain invite-gated.
+3. **`HAPLOGROUP_VERIFIED` join** — checked against which non-PII signal (fed haplogroup
+ reconciliation / a `HAPLOGROUP_IS` assertion)? Define.
+4. **Project as PDS record owner** — the proposal makes `groupProject` an admin's PDS
+ record; confirm AppView treats it as the source of truth (ingest) vs. AppView-native.
+ Recommend **PDS record = source, AppView mirrors** (consistent with the rest).
+5. **Cross-project subject** — a subject in two projects: confirm per-project assertion
+ isolation (D4 §11.5) and that PII replication is per-project (no leakage across).
+
+## ✅ AppView ACL BUILT (2026-06-12)
+
+The collaborator-team ACL is built. **Reconciliation:** reused the existing
+`social.group_project` (mig 0009) as the project (not a new `research.project`);
+`owner_did` is the founding ADMIN. Added `research.project_member` (mig 0034:
+project_id → social.group_project, member_did, role, permissions[], appointed_by,
+joined_at, left_at). `du_db::research`: `Role` (ADMIN/CO_ADMIN/MODERATOR/CURATOR) +
+`Capability` + `Role::allows` (the §4 map), `role_of` (owner⇒ADMIN, else live
+project_member), `is_team_member`, `can`, `add_member`/`revoke_member`(left_at)/
+`members_of`. **Wired in:** D2's register is now `ManageSubjects`-gated (ADMIN/CO_ADMIN),
+the subjects read is team-member-gated; D1's project-scoped request + consent require
+the actor be a live team member (`exchange::request_meta` + `project_scope_id`). Team
+endpoints `/api/v1/research/project/{member,member/revoke,members}` (signed, ADMIN-gated).
+Memory `group-project-acl`. **Remaining (Navigator/D4):** the groupProject PDS-record
+ingest (§12 Q4); invite/join + PII replication/succession over D1 (§8/§9); the D4
+capabilities (WriteAssertions/ResolveDispute/PromoteToCatalog defined, enforced when D4
+lands); shared `du-domain` role enum; granular `permissions[]` overrides.
+
+## 13. Next step — Platform track complete
+
+D1–D5 are the full collaboration platform: **channel (D1) · pseudonymous registry (D2)
+· genetic resolver (D3) · attributed claims with split rails (D4) · projects + ACL +
+lifecycle (D5)** — with the no-PII-in-AppView invariant intact end to end. Buildable
+order across the track: D1 `du-exchange` + broker → D2 `research_subject` + id-exchange
+→ D4 `research.assertion` + rails → D5 `project`/`project_member` ACL → D3 IBD on the
+same channel. The **Catalog track (D6 discovery automation, D7 multi-test-type, D8
+sequencer-lab)** is independent and can proceed in parallel whenever the team chooses.
diff --git a/documents/planning/design-doc-triage-report.md b/documents/planning/design-doc-triage-report.md
new file mode 100644
index 00000000..764cf1d6
--- /dev/null
+++ b/documents/planning/design-doc-triage-report.md
@@ -0,0 +1,450 @@
+# Design-Doc ↔ Rust Triage Report
+
+**Started:** 2026-06-07. **Purpose:** walk the original (pre-rewrite) planning
+design docs one by one, compare each against the actual Rust implementation, and
+record a triage verdict + recommended action **for later action** (nothing is
+changed by this report itself).
+
+**Scope:** the 11 original docs in `documents/planning/` (excludes the new
+`d1`–`d5` + `design-roadmap-rust-rewrite.md`, which are current). The
+`documents/proposals/` set (Bucket B) is out of scope here.
+
+**Verdict legend**
+- ✅ **Doc current** — matches the code; no action.
+- 📝 **Update doc** — code is the source of truth; doc is stale/drifted.
+- 🔧 **Make code compliant** — doc is the intended design; code should change.
+- ⚖️ **Split** — some of both (note which parts).
+- 🗑️ **Deprecate/supersede** — doc describes a dropped or superseded approach.
+
+## Execution log (2026-06-07)
+
+- **Reconciliation headers added** to the kept docs (#2, #3, #5, #6, #7, #8, #11);
+ #1 already had one.
+- **Removed** the three superseded docs (#4 jsonb-consolidation, #9 ibd-matching,
+ #10 appview-pds-backfeed) per "if superseded, just remove it." Their inbound
+ references inside `documents/planning/` were rewired (→ D1/D3 for IBD; "realized
+ in mig 0002/0004" for JSONB; "dropped, member-claim carve-out under D1/D4" for
+ backfeed). The PDS-backfeed **member-claim carve-out** is preserved in the
+ design-roadmap gap catalog + D2/D5.
+- **Still to do (later passes):** references to the removed docs remain in
+ `documents/atmosphere/` (00-Overview, 04-Ancestry-Records, 06-IBD-Matching-Records,
+ Executive-Summary) and `documents/proposals/` (branch-age-estimation,
+ group-project-system) — clean up when those doc sets are triaged.
+
+## Status index
+
+| # | Doc | Verdict | Action owner |
+|---|-----|---------|--------------|
+| 1 | variant-naming-authority.md | ✅ Doc current | — (optional code nicety) |
+| 2 | tree-versioning-system.md | 📝 Update doc | docs |
+| 3 | openalex-publication-discovery.md | 📝 Update doc (light) | docs |
+| 4 | jsonb-consolidation-analysis.md | 🗑️ REMOVED (realized) | done |
+| 5 | multi-test-type-roadmap.md | ⚖️ Split (reconcile built; rest is forward = D7) | docs + forward |
+| 6 | sequencer-lab-inference-system.md | ⚖️ Split (schema built incl. consensus; logic forward = D8) | docs + forward |
+| 7 | haplogroup-discovery-system-overview.md | ✅ Doc current (minor terminology) | docs (light) |
+| 8 | haplogroup-discovery-system.md | ⚖️ Split (curator/pool half built; engine forward = D6; arch evolved) | docs + forward |
+| 9 | ibd-matching-system.md | 🗑️ REMOVED (→ D1 + D3) | done |
+| 10 | appview-pds-backfeed-system.md | 🗑️ REMOVED (dropped; 1 carve-out) | done |
+| 11 | post-mvp-roadmap.md | 📝 Update / reconcile with design-roadmap | docs |
+
+---
+
+## 1. variant-naming-authority.md — ✅ Doc current
+
+**Compared against:** `du_db::naming` (`crates/du-db/src/naming.rs`),
+`du_db::variant`, migration 0016, `/curator/naming`, `/api/v1/variants/export.gff`.
+
+**Finding:** already reconciled — the doc carries an accurate `Implementation
+status (2026-06, Rust)` header and the code matches it: `DU` sequence
+(`core.next_du_name()`), lifecycle `UNNAMED→PENDING_REVIEW→NAMED`, mint preserves
+the prior name as a `common_names` alias, local same-coordinate dedup
+(`dedup_by_coordinates`, GRCh38), curator queue with modes, GFF3 propagation
+export. The two "Not yet" items are genuine future work, not violations:
+- **Live external (YBrowse/ISOGG/YFull) dedup lookup** — not built; "check
+ external names" is a manual curator step.
+- **Unnamed variants in the public API/domain** — `du-domain::Variant.canonical_name`
+ is still `String`; the code sidesteps it by filtering `canonical_name IS NOT NULL`
+ on every public path, so unnamed variants never flow through that type.
+
+**Latent edge (low severity):** `du_db::variant::get_by_id` selects
+`canonical_name` into a non-`Option` without a NULL filter, so
+`GET /api/v1/variants/{id}` on an *unnamed* variant id would 500 on row decode.
+Unreachable via normal UX (unnamed ids aren't surfaced anywhere public).
+
+**Recommended action (later, optional):** harden `get_by_id` to tolerate/404 a
+NULL `canonical_name`. Full compliance (Option in `du-domain::Variant`) is a
+cross-repo change the doc already defers. No doc change needed.
+
+---
+
+## 2. tree-versioning-system.md — 📝 Update doc
+
+**Compared against:** `tree.change_set` / `tree.tree_change` (mig 0001 enum +
+0003), `tree.wip_*` staging tables, `tree.curator_action` (mig 0010),
+`du_db::change_set`, `routes/change_sets.rs` + `routes/reviews.rs` + `/manage/*`
+(`routes/versioning.rs`).
+
+**Finding:** the *design* is correct and implemented — the doc recommends **Option
+B (overlay change-sets)** and that's exactly what Rust built (`tree.change_set`
++ `tree.tree_change`; **no `tree.tree_version` table** — Option A was not taken;
+audit in `tree.curator_action` as described). But every concrete specific is
+Scala-era and has drifted:
+
+- **Code/types:** Scala `case class` / `Future` / `trait TreeVersioningService` /
+ `*.scala.html` → Rust `du_db::change_set` + axum routes + Askama.
+- **Schema:** `SERIAL` / `VARCHAR CHECK(...)` / `TIMESTAMP` / added
+ `tree_version_id` columns → `BIGINT IDENTITY` / native enum
+ `tree.change_set_status` / `TIMESTAMPTZ` / the existing **temporal**
+ (`valid_from`/`valid_until`) model (no version-id columns).
+- **API:** documented public `/api/v1/tree/change-sets` + `/api/v1/curator/changes/*`
+ **do not exist**. Reality: `/curator/change-sets/*` + `/curator/reviews/*` (UI) and
+ `/manage/change-sets/*` + `/manage/haplogroups/merge[/preview]` (machine). Change-sets
+ are deliberately **not** in the public `/api/v1`.
+- **Permissions:** granular `tree.version.*` → the single **`Curator`** role guard
+ (Admin/TreeCurator/Curator).
+- **Ambiguity handling (substantive evolution):** the doc describes a **file-based
+ `ambiguity_report_path`** + an in-change-set review. Rust replaced this with the
+ **`tree.wip_*` staging tables + a dedicated `/curator/reviews` resolution flow**
+ (REPARENT/MERGE_EXISTING/DEFER), enacted by the change-set apply engine. The doc
+ doesn't mention this layer at all.
+
+**Recommended action (later):** **update the doc to match the code.** Add a
+`Rust implementation status` reconciliation header (as variant-naming-authority.md
+has), correct the schema/API/permissions specifics, and add a section on the
+`wip_*` + `/curator/reviews` merge-review layer that superseded the file-report
+approach. Keep the Option A/B rationale as historical design context. Cross-link
+the refreshed user guide (`../curator-guide-tree-versioning.md`). No code changes
+needed — the system is built and working.
+
+---
+
+## 3. openalex-publication-discovery.md — 📝 Update doc (light)
+
+**Compared against:** `pubs.publication_candidate` + `pubs.publication_search_config`
+(mig 0006), `du_db::publication` (`enabled_search_configs`, `upsert_candidate`,
+`promote_candidate`, `review_candidate`), `du_jobs::publications`
+(`publication-update`, `publication-discovery`), `routes/publications.rs`
+(`/curator/publications`), `du_external::openalex`.
+
+**Finding:** design is sound and **substantially implemented** — scheduled
+discovery runs each enabled search config and upserts candidates; candidates
+dedupe by `openalex_id`; the curator review queue (`/curator/publications`) does
+**accept (promote to a reference) / reject / defer**. So the doc's Phase-1 "simple
+curator review UI" (shown `[ ]`) is in fact **done**. Drift to fix:
+
+- **Stale specifics:** Scala `OpenAlexService.scala` / `PublicationService`,
+ **Pekko Quartz** cron, `public.users`, `SERIAL`, plural/unprefixed table names,
+ and the `/api/private/publication-candidates/*` endpoints → Rust
+ `du_external::openalex` + `du_db::publication` + the tokio scheduler +
+ `pubs.publication_candidate` (singular) + the `/curator/publications` UI (no
+ public candidate API).
+- **Schedule:** documented weekly cron (Sun 02:00) → Rust runs **daily**
+ (`Duration::from_secs(86_400)`), config-gated.
+- **Phase status drift:** Phase 1 complete incl. the curator UI; **not built:**
+ relevance scoring (Phase 2 — the `relevance_score` column exists but isn't
+ computed), smart discovery (Phase 3), biosample-extraction hints (Phase 4), and
+ the `publication_search_run` history/debug table.
+- **Addition not in the doc:** the public **"suggest a paper"** on-ramp
+ (`/references/submit`: DOI → OpenAlex resolve → candidate queue).
+
+**Recommended action (later):** light doc refresh — Rust reconciliation header,
+fix the schema/endpoint/scheduler specifics, correct the phase checkboxes, add the
+`/references/submit` on-ramp, and keep relevance scoring + `search_run` as explicit
+forward work. No code changes required.
+
+---
+
+## 4. jsonb-consolidation-analysis.md — 🗑️ Superseded (recommendations realized)
+
+**Compared against:** migrations 0002 (`core.biosample.original_haplogroups`) and
+0004 (`genomics` header + `sequence_file`, `alignment_metadata`).
+
+**Finding:** this is a **pre-rewrite analysis** recommending 7 child-table → JSONB
+consolidations, and the Rust redesign **implemented all of them** (mig 0004's
+header explicitly enumerates the same moves):
+- `sequence_file_checksum` / `_http_location` / `_atp_location` → `sequence_file`
+ `checksums` / `http_locations` / `atp_location` JSONB ✓ (child tables gone)
+- `alignment_coverage` / `pangenome_alignment_coverage` → `coverage` JSONB on the
+ metadata tables ✓ — **with the recommended expression index**
+ (`((coverage->>'meanDepth')::double precision)`)
+- `biosample_original_haplogroup` / `citizen_*` → `core.biosample.original_haplogroups`
+ JSONB ✓ (and the three biosample tables collapsed to one)
+- bonus: scattered `at_uri`/`at_cid` → a single `atproto` JSONB ✓
+
+Nothing to make compliant — the code already embodies (and slightly exceeds) the
+analysis. Only nit: implemented coverage keys are camelCase (`meanDepth`,
+`medianDepth`) alongside `percent_coverage_at_*x`, vs the doc's snake_case
+proposal — cosmetic, no action.
+
+**Recommended action (later):** treat as **historical/implemented** — add a short
+"realized in the Rust redesign (mig 0002/0004)" note at the top, or archive it.
+No code or design action.
+
+---
+
+## 5. multi-test-type-roadmap.md — ⚖️ Split (reconcile the built part; rest is forward = D7)
+
+**Compared against:** `genomics.test_type_definition` + `genomics.coverage_expectation_profile`
+(mig 0004), `core.data_generation_method` / `core.target_type` enums,
+`sequence_library.test_type_id` FK, `du-domain` `DataGenerationMethod`/`TargetType`,
+`fed.genotype` (mig 0012).
+
+**Finding — built foundation (~Phase 1):**
+- `genomics.test_type_definition` exists — **leaner** than the doc's spec: has
+ code/display_name/category/vendor/target_type/expected_min_depth/supports_*/
+ typical_file_formats/description, but **omits** `expected_target_depth`,
+ `expected_marker_count`, `version`, `release_date`, `deprecated_at`,
+ `successor_test_type_id`, `documentation_url`. Coverage thresholds live in a
+ separate `genomics.coverage_expectation_profile` (not inline columns).
+- `core.data_generation_method` (SEQUENCING/GENOTYPING) + `core.target_type`
+ native enums; `du-domain` mirrors them. ✓
+- `sequence_library.test_type_id` is a **native FK from the start** — the doc's
+ Phase-1 "[ ] migrate the string column to an FK" is moot (no string column).
+- **Seed data NOT loaded** — the table is empty in migrations (the doc shows seed
+ as `[X]`; in Rust it's outstanding, per the design-roadmap "seed test_type_definition").
+
+**Finding — not built (forward, = design-roadmap D7):** everything in Phases 2–6 —
+`test_type_target_region`, `genotyping_test_summary` (local; partly shadowed by the
+federated `fed.genotype` summary), `test_type_haplogroup_marker_coverage`,
+`test_type_marker_intersection`; the `TestTypeService` + `/api/v1/test-types/*` and
+`/api/v1/haplogroup-variants/*` APIs; chip-metadata ingest; **test-type-aware
+haplogroup confidence**; cross-test-type IBD. Also tightly coupled to the (also
+forward) haplogroup-discovery doc.
+
+**Drift:** Scala throughout (Slick case classes, `Future` service traits,
+`models.domain.genomics`), Pekko, removed `/api/private` endpoints — all need
+restating in Rust terms when the forward parts are built.
+
+**Recommended action (later):** **keep the doc as the forward design (D7) but
+reconcile the built part** — add a Rust status header: Phase-1 schema is built
+(note the leaner `test_type_definition` + separate `coverage_expectation_profile`
++ native `test_type_id` FK), seed data is still TODO, and Phases 2–6 remain
+forward; restate their schema/services in Rust terms when picked up. The core
+indexing principle (index Y/mt variants + summaries, never raw autosomal) is
+correct and already matches the implemented federation posture. No code change
+required now beyond (optionally) loading the test-type seed.
+
+---
+
+## 6. sequencer-lab-inference-system.md — ⚖️ Split (schema built incl. consensus; logic forward = D8)
+
+**Compared against:** `genomics.sequencing_lab`, `genomics.sequencer_instrument`,
+`genomics.instrument_observation`, `genomics.instrument_association_proposal`
+(mig 0004), `fed.sequencerun.instrument_id` (mig 0012). No lab-lookup/consensus
+code found (only `coverage.rs`/`fed::core` touch `instrument_*` for benchmarks).
+
+**Finding — schema built (more than the roadmap's ~20% implies):** all four tables
+exist, **including the two the doc marks as NEW/`[ ]`** — `instrument_observation`
+and `instrument_association_proposal`. So the consensus data model is in place.
+Schema deltas to reconcile:
+- Tables are in `genomics`, native `BIGINT IDENTITY` (doc: `public.*`, `SERIAL`);
+ `sequencing_lab` is leaner (no created/updated_at).
+- **`sequencer_instrument` has no `lab_id` FK** and a different column set
+ (`model_name`, `manufacturer`, `year_introduced`, `estimated_max_throughput`) —
+ i.e. instrument↔lab is intended to resolve via observation→proposal→accept, not a
+ static FK. The doc's proposed `sequencer_instrument` add-ons
+ (`source`/`observation_count`/`confidence_score`/`last_observed_at`) are **not**
+ present (that state lives in the proposal table instead).
+
+**Finding — zero logic (forward = design-roadmap D8):** none of it is wired —
+no `/api/v1/sequencer/lab` lookup, no `/api/v1/labs/{instrument-id}`, no Firehose
+`instrumentObservation` ingestion, no consensus/confidence engine, no curator
+instrument-proposal review UI. The "existing API endpoints / domain models" the doc
+lists are **Scala-era and do not exist** in Rust. The consensus source in Rust is
+`fed.sequencerun.instrument_id` (crowdsourced @RG id); the
+`com.decodingus.atmosphere.instrumentObservation` lexicon + its `fed.*` mirror are
+**not yet defined** (design-roadmap notes the record shape is TBD).
+
+**Drift:** Scala/Slick/Tapir/Pekko throughout; "Current State" lists endpoints that
+were never ported.
+
+**Recommended action (later):** keep as the forward design (D8) but reconcile —
+Rust status header: the **full schema (incl. consensus + proposal tables) is in
+place; logic is unbuilt**; fix the schema specifics (genomics schema, no `lab_id`
+FK, actual instrument columns, observation/proposal tables already present); note
+the consensus source is `fed.sequencerun.instrument_id` and the observation
+lexicon/mirror is still to define; restate services/APIs in Rust terms (axum +
+utoipa; Firehose = the existing Jetstream consumer). Drop the "existing endpoints"
+section. No code change required now.
+
+---
+
+## 7. haplogroup-discovery-system-overview.md — ✅ Doc current (minor terminology)
+
+**Compared against:** `tree.proposed_branch*` / `tree.biosample_private_variant` /
+`tree.discovery_config` (schema), `du_db::proposal`, `/curator/proposals`
+(review/promote) + `/manage/curation/proposals` intake.
+
+**Finding:** this is a **stack-agnostic conceptual overview** (discover → correlate
+→ propose → review → evolve; evidence sources; thresholds; curator workflow;
+privacy/visibility; federated model). It still describes the intended system
+accurately, and the curator-review half it describes **is built**. No
+implementation specifics to drift. Two minor nits:
+- **Terminology:** "Firehose / real-time stream" → the implemented inbound path is
+ the **Jetstream summary mirror** (`fed.*`); the credential-holding inbound
+ firehose was dropped. The *concept* (Edge → PDS → stream → AppView discovery)
+ still holds.
+- **Auto-promotion** ("10+ samples → can be automatically accepted") is aspirational
+ — curator accept is the gate today; the automated ingest→consensus engine is
+ forward (= design-roadmap D6).
+
+**Recommended action (later):** optional one-line note that ingestion is via the
+Jetstream summary mirror (not a credential-holding firehose) and that
+auto-promotion is a future option. Otherwise leave as-is.
+
+---
+
+## 8. haplogroup-discovery-system.md — ⚖️ Split (curator/pool half built; engine forward = D6; architecture evolved)
+
+**Compared against:** `du_db::proposal` (`crates/du-db/src/proposal.rs`),
+`/curator/proposals` (review/promote) + `/manage/curation/proposals` intake,
+`tree.proposed_branch` / `_evidence` / `_variant`, `tree.biosample_private_variant`,
+`tree.discovery_config`, `tree.wip_*`. *(Triaged from the doc's
+Prerequisites/Architecture + the overview + the cross-references in
+multi-test-type-roadmap.md + the confirmed schema/code, rather than a full read of
+all 71 KB.)*
+
+**Finding — done:**
+- The doc's **prerequisite** (`variant-schema-simplification`: universal JSONB
+ coordinates, parallel-mutation handling, JSONB aliases) is **implemented** in the
+ Rust variant model.
+- Schema is present (proposed_branch + evidence + variant, biosample_private_variant,
+ discovery_config, wip_*).
+- The **curator review/promote + proposal pooling** half is **built**:
+ `proposal.rs` pools submissions by (proposed_name, parent) across submitters,
+ tracking `evidence_count`/`submitter_count`/`confidence`/`status`; curators work
+ `/curator/proposals` (review/promote); machine intake at `/manage/curation/proposals`.
+
+**Finding — forward + architecture evolved (= design-roadmap D6):** the doc
+specifies an **AppView-side pipeline** — *Private Variant Extraction* that parses
+`HaplogroupResult.mismatchingSnps` from ingested biosamples (Citizen Firehose +
+External upload) → groups → ProposedBranch → consensus detection. Rust **inverts the
+ingestion model**: **Navigator (Edge) extracts the private variants and submits a
+proposal; the AppView pools by submitter** — there is no AppView-side raw-extraction
+from `fed.biosample`. This is consistent with the no-PII / edge-compute direction.
+The automated consensus/Jaccard engine + auto-reassignment remain unbuilt (D6 — the
+`du-domain` algorithm spec is the open design piece).
+
+**Drift:** Scala/Slick/Tapir, `Firehose`, `/api/v1/discovery/proposals` +
+`/api/v1/curator/proposals/{id}/accept` → Rust `/curator/proposals/*` +
+`/manage/curation/proposals`.
+
+**Recommended action (later):** keep as the forward design (D6) but reconcile
+substantially — Rust status header (prereqs + schema + curator/pooling half done);
+**document the ingestion-model change** (Edge-submits-proposals, not AppView-side
+extraction from `mismatchingSnps`); restate endpoints/services in Rust terms; mark
+the consensus/Jaccard engine + auto-reassignment as the remaining D6 work. No code
+change required now.
+
+---
+
+## 9. ibd-matching-system.md — 🗑️ Superseded by D1 + D3
+
+**Compared against:** `ibd.*` schema (mig 0007: `match_request`, `match_consent`,
+`match_suggestion`, `ibd_discovery_index`, `ibd_pds_attestation`, `population_*`),
+the new `d1-encrypted-edge-exchange.md` + `d3-ibd-matching-impl.md` (read in full
+earlier). **No `du-db::ibd` code exists** (schema-only).
+
+**Finding:** this is the **original (Scala/Tapir-era) IBD requirements** doc — it
+invents its own crypto, key exchange, and P2P channel, and references the Java Edge
+App. It is **explicitly superseded** by the two new docs we just added:
+- **D1** generalizes its crypto/consent/channel into the shared `exchange.*`
+ substrate (D1's own note: it "supersedes/generalizes the crypto + Edge-coordination
+ sections of ibd-matching-system.md"; it also fixes the Ed25519-can't-ECDH gap).
+- **D3** is the **Rust build spec** that "implements the requirements in
+ ibd-matching-system.md on top of D1," folding `ibd.match_request`/`match_consent`
+ into `exchange.*` and keeping the IBD-specific tables.
+
+So the doc's value is now purely as **historical requirements**; the authoritative
+design is D1 + D3. IBD itself is **unbuilt** (schema present, logic forward — D3
+closes the Match track).
+
+**Recommended action (later):** add a header marking it **superseded** — point
+crypto/channel/key-exchange → `d1-encrypted-edge-exchange.md`, the Rust impl →
+`d3-ibd-matching-impl.md`; keep the body as historical requirements (or archive).
+No code change (build per D3 when the Match track is scheduled).
+
+---
+
+## 10. appview-pds-backfeed-system.md — 🗑️ Superseded/dropped (one open carve-out)
+
+**Compared against:** `rust/README.md` + STATUS (federation is **outbound-only**),
+the `[[atproto-federation-direction]]` decision (drop private firehose; use
+permissions/OAuth + notify-fetch), the design-roadmap Q2. No backfeed code exists
+(correctly absent).
+
+**Finding:** the doc designs a **bidirectional AppView→PDS backfeed** that pushes
+refined/derived data (haplogroup refinement, branch discovery, ancestral STR/TMRCA,
+matches, lab inference) back into user PDSes. The Rust rewrite **dropped this
+direction**: federation is an **outbound Jetstream summary mirror** (Navigator
+publishes → `fed.*`) plus a notify-fetch posture; the inbound firehose + PDS-fleet
++ backfeed model is out of scope. So the doc describes a **non-chosen
+architecture**.
+
+**Open carve-out (don't fully delete):** the design-roadmap (Q2) flags that
+**member-claim** custody (D2 §6 / D5) may need a *limited* AppView→PDS write — to
+be decided under D1/D4. So the general backfeed is dropped, but the narrow
+member-claim write is an open question.
+
+**Recommended action (later):** mark **superseded/dropped** with a header (Rust =
+outbound-only mirror + notify-fetch; no general backfeed), and record the single
+open carve-out (limited member-claim write, decide under D1/D4). Keep as historical
+/decision-input or archive. No code (correctly nothing built).
+
+---
+
+## 11. post-mvp-roadmap.md — 📝 Update / reconcile with the design-roadmap
+
+**Compared against:** current build state (per docs #1–#10 above) and the new
+`design-roadmap-rust-rewrite.md` (the current authoritative index).
+
+**Finding:** this is the **old central roadmap** indexing the six subsystem docs
+with a dependency graph + phased plan (A–F). It is **largely superseded** by
+`design-roadmap-rust-rewrite.md`, which the new doc itself only calls a "pairs
+with… feature sequencing" companion — but in practice the new roadmap is the
+accurate one (it has the gap catalog, the two-track D1–D8 sequencing, and the
+no-PII reconciliation). Specific drift:
+- **Stale statuses:** Phase A (tree schema / `test_type_definition` /
+ `sequence_file` JSONB) is correctly `[X]`, and OpenAlex candidate queue `[X]` —
+ but it misses that the **curator proposal/review half**, **tree versioning**, and
+ **multi-test + sequencer-lab schema** are now built; and its "In Progress /
+ Planned" labels predate that.
+- **Omits the entire collaboration/IBD-via-D1 platform** (D1–D5) — it still lists
+ IBD as the standalone `ibd-matching-system.md` (now superseded by D1+D3).
+- **Scala terms** throughout (Firehose, `PrivateVariantExtractionService`,
+ `publication_candidates` plural, etc.).
+
+**Recommended action (later):** **reconcile with `design-roadmap-rust-rewrite.md`**
+— either demote post-mvp-roadmap to historical with a header pointing at the new
+roadmap as authoritative, or refresh its status table + terminology and graft in
+the D1–D5 platform track. Keep its still-useful bits (per-phase detail,
+JSONB-distributed-across-phases plan, success metrics). No code action.
+
+---
+
+## Summary of verdicts
+
+| Verdict | Docs |
+|---------|------|
+| ✅ Doc current | #1 variant-naming-authority, #7 discovery-overview (minor terminology) |
+| 📝 Update doc | #2 tree-versioning, #3 openalex (light), #11 post-mvp-roadmap |
+| ⚖️ Split (reconcile built + forward design) | #5 multi-test-type (D7), #6 sequencer-lab (D8), #8 discovery (D6) |
+| 🗑️ Superseded / dropped | #4 jsonb-consolidation (realized), #9 ibd (→ D1+D3), #10 backfeed (dropped; 1 carve-out) |
+
+**Cross-cutting themes**
+- **No code is wrong.** Every verdict is "update the doc," never "make the code
+ comply" — the Rust build is the source of truth; the pre-rewrite docs carry
+ Scala/Slick/Tapir/Pekko/Firehose specifics, stale schemas/endpoints, and
+ out-of-date status.
+- **Recurring fixes:** add a "Rust implementation status" reconciliation header
+ (as variant-naming-authority.md already has); swap Scala→Rust specifics; correct
+ `/api/v1/*` + `/curator/*` + `/manage/*` route surfaces; replace granular
+ `tree.version.*`/`*.permission` with the `Curator` role; "Firehose" → the
+ outbound **Jetstream** summary mirror.
+- **Two architecture evolutions to capture:** (a) discovery ingestion is
+ **Edge-submits-proposals**, not AppView-side extraction (#8); (b) IBD crypto/
+ channel is now the shared **D1 `exchange.*`** substrate (#9).
+- **One open product decision:** the limited **member-claim** AppView→PDS write
+ (#10), to be decided under D1/D4 (design-roadmap Q2).
+- **Forward design that's still valid** lives in #5/#6/#8 (= design-roadmap
+ D6–D8) and should be kept (reconciled), not discarded.
diff --git a/documents/planning/design-roadmap-rust-rewrite.md b/documents/planning/design-roadmap-rust-rewrite.md
new file mode 100644
index 00000000..7d6bc87f
--- /dev/null
+++ b/documents/planning/design-roadmap-rust-rewrite.md
@@ -0,0 +1,210 @@
+# AppView (decodingus) — Design-Gap Roadmap for the Rust Rewrite
+
+**Status:** Living index. Drafted 2026-06-06.
+**Purpose:** One map of *what design work remains* for the Rust AppView, what
+already has a doc, what must be **reconciled** with the new Navigator-side
+genealogical-platform direction, and a recommended **order**. This is a
+navigational doc — it points at the real design docs (existing and to-write), it
+does not restate them.
+
+**Pairs with:** `rust/STATUS.md` (build status), `planning/post-mvp-roadmap.md`
+(feature sequencing). Navigator-side companions live in the **DUNavigator** repo:
+`docs/design/ftdna-project-import.md` and `docs/design/academic-ena-import.md`.
+
+## 1. Where the rewrite stands
+
+Per `rust/STATUS.md` (2026-06-05): the **spine is done and cutover-verified**.
+Built: schema (migrations 0001–0022), `du-db` query layer, public HTML/HTMX + JSON
+API, auth + curator tools, haplotree build/merge/versioning, SNP-graft + review,
+YBrowse mirror→reconcile (~3M variants), Y-STR signatures/prediction/age, variant
+naming authority, ETL (verified on a real prod dump), and **federation reporting**
+(Jetstream → `fed.*` mirror + report endpoints).
+
+Launch-critical path is just **(1) cutover execution** + alias-aware mt resolution,
+and **(2) the live cross-host AT Proto OAuth test**. Everything below is the
+**post-launch feature mass** — and it's where the design gaps are.
+
+## 2. The two buckets of remaining design
+
+**Bucket A — documented subsystems, not yet built in Rust.** Each has a planning
+doc; the gap is a *Rust-implementation spec* (exact SQL, state machines, endpoints)
+and reconciliation with the new schema. Mostly schema-only today.
+
+**Bucket B — the collaboration / genealogy-platform layer.** This is what the
+Navigator FTDNA work (`ftdna-project-import.md` §8) depends on. It is **partly
+covered by older proposals** (`proposals/group-project-system.md`,
+`Messaging_and_Feed_System.md`, `Reputation_System_Implementation.md`) — but those **predate** the
+ResearchSubject/assertion model *and take the opposite privacy stance* (see §3).
+The gap here is **reconciliation + the net-new pieces**, not greenfield.
+
+## 3. The central reconciliation — RESOLVED (2026-06-06): no PII in AppView
+
+The apparent tension between the privacy-first `group-project-system.md` and the
+Navigator FTDNA design is **decided in favor of the privacy-first stance**:
+
+> **AppView holds NO PII. It is a pure broker.** It keeps its anonymized/aggregate-
+> only posture (the `fed.*` mirror drops donor PII at ingest). Member PII — names,
+> MDKA, kit↔identity linkage — is exchanged **admin-to-admin over an encrypted
+> Edge-to-Edge (P2P) channel**, the **same mechanism the IBD system uses** for
+> genetic comparison (now D1/D3: ECDH X25519 + AES-256-GCM,
+> AT-Proto-brokered handshake, P2P/relay transport). AppView coordinates discovery,
+> consent, and key exchange, and persists **PII-free** match/assertion *state*.
+
+This **reinforces** `group-project-system.md` (member-sovereign, refs-not-copies)
+and **corrects** the earlier Navigator draft (which had PII landing in an AppView
+private tier — now amended in `ftdna-project-import.md` §8 to P2P-only).
+
+The bootstrap→sovereign **lifecycle still holds**, but no server-side PII copy
+exists at any stage:
+
+```
+[Admin-stewarded bootstrap] [Member-sovereign steady state]
+ admin imports FTDNA project ──► member onboards, proves kit control,
+ PII stays LOCAL; shared with CLAIMS their ResearchSubject ──► custody
+ co-admins via encrypted P2P (DID) moves to them; they decide their
+ (our FTDNA on-ramp) own visibility (group-project-system.md)
+```
+
+**Consequence — one shared substrate.** Because both IBD comparison and genealogy-
+PII exchange need the same encrypted Edge-to-Edge channel + AppView broker, **design
+it once** (§5, D1) and let both tracks ride it. This is the highest-leverage
+foundational piece; it underpins Bucket B and the IBD impl alike.
+
+## 4. Gap catalog
+
+Legend: ✅ done · ◐ partial · ☐ schema-only · ✎ has design doc · ✶ net-new design needed
+
+### Bucket A — finish the documented subsystems
+
+| Subsystem | Code | Schema | Design doc | Remaining design work |
+| --- | --- | --- | --- | --- |
+| **IBD matching** | ☐ | `ibd` (mig 0007) | → D1 + D3 (orig planning doc removed) | Designed: candidate-pair mining SQL over `fed.*`, dual-consent state machine, Edge↔AppView handoff, match-list endpoints — see `d3-ibd-matching-impl.md` on `d1-encrypted-edge-exchange.md`. **Reused by Bucket B's cross-admin resolver.** |
+| **Haplogroup-discovery automation** | ◐ (curator half ✅) | `tree.proposal`/`wip_*`/`discovery_config` | ✎ `planning/haplogroup-discovery-system.md` (71 KB) | The *ingest→consensus engine*: private-variant extraction from `fed.biosample`/`fed.str_profile`, Jaccard/consensus + thresholds, sample de-dup, auto-reassignment on accept. du-domain algorithm spec. |
+| **Multi-test-type** | ◐ ~30% | `genomics.test_type_definition` (mig 0004/0014) | ✎ `planning/multi-test-type-roadmap.md` (47 KB) | Marker-coverage + target-region reference tables; **test-type-aware confidence** (Big Y-700 vs chip); seed `test_type_definition`. Feeds discovery confidence. |
+| **Sequencer-lab inference** | ◐ ~20% | `genomics` lab/instrument | ✎ `planning/sequencer-lab-inference-system.md` (30 KB) | Public `GET /api/v1/labs/{instrument-id}`; consensus from `fed.instrumentObservation` (record shape not yet defined); curator review + confidence scoring. |
+| **OpenAlex pub discovery** | ◐ | `pubs` | ✎ `planning/openalex-publication-discovery.md` | Mostly built; finish discovery/enrichment edges. Low risk. |
+| **JSONB consolidation** | ✅ realized | mig 0002/0004 | (removed — done) | Realized in the Rust redesign (7 child tables → JSONB on parents). No action. |
+| **PDS backfeed** | ➖ dropped | — | (removed — superseded) | Outbound-only mirror; general backfeed dropped. **Open carve-out:** a *limited* AppView→PDS write for member-claim (§3) — decide under D1/D4. |
+
+### Bucket B — collaboration / genealogy platform
+
+| Piece | Code | Schema | Existing proposal | Remaining design work |
+| --- | --- | --- | --- | --- |
+| **Group projects** | ☐ | `social` placeholder (mig 0009) | ✎ `proposals/group-project-system.md` | **Reconcile** with FTDNA on-ramp (§3); add admin-team membership + roles + ACL + audit; project = the scope boundary for assertions. |
+| **ResearchSubject registry** | ☐ | — | ✶ none | **Net-new, PII-free.** Opaque subject node + **salted `id_hashes[]`** (not raw kit#/accession), cross-admin resolution on hashed/genetic signals (reuses IBD backbone, §3), `custody_did` for member-claim. No names/MDKA. |
+| **Assertion store** | ☐ | — | ✶ none | **Net-new.** Attributed, scoped assertions, append-only + retract, conflict-with-provenance. **Split by `scope`:** non-PII → PDS records + du-jobs ingest + AppView `current_view`; **PII → encrypted P2P only, never stored in AppView.** |
+| **Encrypted P2P exchange + broker** | ☐ | `ibd` (mig 0007) partial | ✶ none (crypto now spec'd in D1) | **Net-new, SHARED with IBD (§3).** The Edge-to-Edge channel (ECDH X25519 + AES-256-GCM) + AppView broker (discovery, consent, key-exchange relay, exchange attestation). Carries IBD comparison **and** genealogy PII. Build once. |
+| **Messaging / feed** | ☐ | `social` placeholder | ✎ `proposals/Messaging_and_Feed_System.md` | Reconcile with assertion threads; the collaboration layer reuses messaging for discussion. Refresh to Rust schema. |
+| **Reputation** | ☐ | `social` placeholder | ✎ `proposals/Reputation_System_Implementation.md` | Lower priority; depends on social being live. Refresh later. |
+
+## 5. Recommended sequencing (design order)
+
+Dependency-driven. Each `D#` is a doc to write (or refresh) before the matching
+build work.
+
+1. **D1 — Encrypted Edge-to-Edge exchange + AppView broker** ✅ **DRAFTED:
+ `planning/d1-encrypted-edge-exchange.md`** *(net-new, SHARED foundation, gates
+ both tracks)* — X25519 ECDH (X3DH-lite, forward secrecy) + AES-256-GCM, identity-
+ bound via a published Ed25519-signed X25519 key (fixes the "Ed25519 can't ECDH"
+ gap), **blind store-and-forward relay** (recommended) so offline peers work,
+ generic `exchange.*` broker schema + `ExchangeEnvelope`, new shared `du-exchange`
+ crate. Lifts/generalizes the original IBD requirements (now folded into D3). Open: transport confirm,
+ relay host, generalize-now (§12).
+2. **D2 — ResearchSubject + identity resolution** ✅ **DRAFTED:
+ `planning/d2-research-subject-registry.md`** *(net-new, PII-free)* — pseudonymous
+ `research_subject` registry (`{research_subject_id, custody_did}` + memberships,
+ **no ids/hashes**), exact match via **D1 id-list exchange** (corrected the
+ rejected AppView-hash idea), genetic match via D3, member-claim custody,
+ cross-project = claim-only. **Uses** D1; **depends on** D3's resolver. Open:
+ id-exchange-vs-PSI, cross-project policy, claim proof (§10).
+3. **D3 — IBD matching impl spec** ✅ **DRAFTED:
+ `planning/d3-ibd-matching-impl.md`** *(implements the IBD requirements in Rust
+ on D1)* — candidate mining SQL over `fed.*` (haplogroup/population-overlap/shared-
+ match → `match_suggestion`), dual-consent reuses `exchange.*`, Edge handoff = a D1
+ session (`purpose=IBD_*`), summary-only attestation indexing, **relationship
+ classification feeds D2's genetic resolver** (same-person → `subject_link`).
+ Closes the Match track. Open: phasing, N² gate, algo provenance (§12).
+4. **D4 — Assertion store (split rails)** ✅ **DRAFTED:
+ `planning/d4-assertion-store.md`** *(net-new, the collaboration primitive)* —
+ attributed/scoped/append-only assertions over `research_subject_id`; **PII-class ×
+ scope → three rails** (R1 non-PII public→PDS record/ingest; R2 non-PII project→
+ AppView `research.assertion`+current_view, D5 ACL; R3 PII→D1 P2P-only, folded
+ locally, never server-side); `current_view` fold keeps disputes with provenance;
+ `SAME_PERSON_AS` drives D2 merge (D3 feeds it); branch assertions surfaced *against*
+ the curated tree. Open: NOTE PII-default, dispute authority (§11).
+5. **D5 — Group-project reconciliation** ✅ **DRAFTED:
+ `planning/d5-group-project-reconciliation.md`** *(reconciles `group-project-
+ system.md` with D1–D4)* — **two memberships** disentangled (collaborator-team DIDs
+ +roles = the ACL/consent-circle vs. pseudonymous subject membership); adopts the
+ proposal's roles (ADMIN/CO_ADMIN/MODERATOR/CURATOR + perms) and binds each to what
+ it gates in D1/D4; `research.project`+`project_member` ACL gates PII exchange/R2/
+ disputes; proposal's aggregate records map onto D4 R1/R2 (no duplication);
+ **stewarded→claim→sovereign lifecycle** (mixed subjects per project); PII durability
+ via consent-circle P2P replication + succession. **Platform track COMPLETE.**
+6. **D6 — Haplogroup-discovery automation spec** *(refresh
+ `haplogroup-discovery-system.md`)* private-variant ingest→consensus engine. Mostly
+ independent of B; can run in parallel anytime after launch.
+7. **D7 — Multi-test-type confidence** + **D8 — Sequencer-lab inference** — finish
+ the documented subsystems; both feed discovery quality. Parallelizable.
+8. **Deferred:** messaging/reputation refresh, JSONB consolidation, backfeed
+ decision (revisit under D1/D4).
+
+**Two tracks** can run concurrently, joined at **D1 (the shared encrypted-exchange
+substrate)**: **Platform track** D1→D2→D4→D5 (genealogy collaboration) and **Match
+track** D1→D3 (IBD) share the channel; the **Catalog track** D6→D7/D8 (tree-science
+quality) is independent.
+
+## 6. Cross-repo contracts to keep in sync
+
+Bucket B is inherently two-sided. Each net-new AppView doc must pin the
+**Navigator-side contract** already drafted in DUNavigator (`ftdna-project-import.md`
+§8, amended 2026-06-06 to the no-PII / P2P model):
+- **Non-PII** record/NSID shapes (assertions, salted `id_hashes`, aggregate state) →
+ extend `du-domain::fed`; ingested via the existing **Jetstream → du-jobs** path.
+- **PII** payloads (names, MDKA, kit↔subject map, raw STR/SNP) → the **encrypted P2P
+ channel (D1)**, never an AppView record. Navigator runs the Edge endpoint; AppView
+ only brokers + attests.
+- `ResearchSubject` ↔ Navigator `biosample.guid`: AppView stores the **opaque** id +
+ hashes; the clear `external_id(source, id)` stays in Navigator's local store.
+- **Sequencer-lab lookup + consensus (D8) — DONE 2026-06-12 (lookup + engine);
+ curator review UI remains.** Navigator's Rust rewrite **lost** the Scala lab association
+ (FGC/FTDNA/YSEQ/Dante/Nebula…) + read-name platform/instrument inference; it's being
+ restored Navigator-side (read-name scan → `instrument_id`/flowcell/model + a local
+ `labs` catalog). The **AppView lookup endpoint is now built**:
+ **`GET /api/v1/sequencer/lab?instrument_id=…`** (single lookup, 404 if unknown) and
+ **`GET /api/v1/sequencer/lab-instruments`** (bulk cache seed), resolving via the
+ **preseeded** `genomics.sequencer_instrument.lab_id` (mig 0025 re-adds it; the ETL
+ backfills the legacy tie that the 0004 redesign had dropped; `du_db::sequencer`). The
+ lookup uses the preseeded direct tie (memory `sequencer-lab-lookup`). The **consensus
+ engine is now also built**: `du_db::sequencer::recompute_consensus` derives
+ observations from `fed.sequencerun ⋈ fed.biosample.center_name` → per-instrument
+ `instrument_association_proposal` (dominant lab, confidence, threshold status,
+ conflict→PENDING), run by `du-jobs run-once sequencer-consensus` (+ hourly); the
+ curator **accept** (`/manage/instrument-proposals/:id/accept`, audited via
+ `du_db::audit::log`) sets `sequencer_instrument.lab_id` — the column the lookup reads.
+ Navigator publishes `instrument_id`/`center_name` on the `sequencerun`/`biosample` fed
+ records. **Remaining D8:** the curator HTMX review UI (API done), the
+ `instrumentObservation` lexicon, and recency/confidence-level scoring refinements.
+
+## 7. Open strategic questions
+
+1. ~~PII in AppView~~ **RESOLVED (§3): no PII in AppView; PII moves via encrypted
+ P2P (D1).** Remaining sub-question: choose **transport** — direct P2P (NAT
+ traversal, both online) vs **blind relay** (store-and-forward ciphertext); D1
+ decides. Relay is likely needed since admins are rarely online simultaneously.
+2. **Backfeed in or out?** Member-claim likely needs AppView→PDS writes; STATUS
+ lists backfeed as dropped. Reconcile under D1/D4.
+3. **Where do FTDNA branch/clade assignments live** vs the curated AppView
+ haplotree? Project `Sub Group` paths are *project assertions*, not catalog truth
+ — keep them in the assertion store, surface against (not merged into) the tree.
+4. **Consent-flag enforcement** — the FTDNA roster's `Publicly Share DNA Results`
+ must gate federation at the AppView boundary; specify where it's checked.
+5. **Sequencing vs launch** — none of this blocks the cutover; confirm it's all
+ post-launch so it doesn't pull focus from the two launch-critical items.
+
+## 8. Next step
+
+Draft **D1 — Encrypted Edge-to-Edge exchange + AppView broker** (the shared
+foundation for both the genealogy and IBD tracks), pinning the transport choice
+(Q1) and the no-PII data-classification policy. Then **D2 (ResearchSubject, PII-free)**
+and **D3 (IBD impl spec)** build on it.
diff --git a/documents/planning/haplogroup-discovery-system-overview.md b/documents/planning/haplogroup-discovery-system-overview.md
index b1e856ec..d2f01930 100644
--- a/documents/planning/haplogroup-discovery-system-overview.md
+++ b/documents/planning/haplogroup-discovery-system-overview.md
@@ -1,5 +1,14 @@
# Haplogroup Discovery System
+> **ℹ️ Rust status (2026-06-07).** This conceptual overview still holds. Two
+> terminology notes: ingestion is via the outbound **Jetstream summary mirror**
+> (`fed.*`), not a credential-holding "Firehose" (that inbound model was dropped);
+> and **auto-promotion** is a future option — curator accept is the gate today. The
+> curator review/promote half is built; the automated discovery engine is forward
+> work (`design-roadmap-rust-rewrite.md` D6). Full technical design + status:
+> [`haplogroup-discovery-system.md`](./haplogroup-discovery-system.md). Triage:
+> `design-doc-triage-report.md` §7.
+
## What It Does
The Haplogroup Discovery System automatically finds new branches on the Y-DNA and mtDNA family trees by analyzing genetic samples from the community.
diff --git a/documents/planning/haplogroup-discovery-system.md b/documents/planning/haplogroup-discovery-system.md
index 261f8e04..1ca5c121 100644
--- a/documents/planning/haplogroup-discovery-system.md
+++ b/documents/planning/haplogroup-discovery-system.md
@@ -1,5 +1,34 @@
# Haplogroup Discovery System
+> **⚖️ Rust status (2026-06-07).** Prerequisites (variant-schema simplification:
+> universal JSONB coordinates, parallel-mutation handling, JSONB aliases) are
+> **done**. Schema is present (`tree.proposed_branch` + `_evidence` + `_variant`,
+> `tree.biosample_private_variant`, `tree.discovery_config`, `tree.wip_*`), and the
+> **curator review/promote + proposal-pooling half is built** (`du-db::proposal`,
+> `/curator/proposals`, `/manage/curation/proposals` intake).
+>
+> **Architecture evolved:** the Rust model is **Edge-submits** —
+> Navigator extracts the private variants and the citizen publishes them; the AppView
+> pools across submitters. There is **no AppView-side extraction from
+> `HaplogroupResult.mismatchingSnps`** as the pipeline below describes (this aligns
+> with the no-PII / edge-compute direction).
+>
+> **D6 DONE (2026-06-12).** Delivery is a **`com.decodingus.atmosphere.privateVariant`
+> lexicon** record (one per biosample/DNA-arm: terminal + variant calls) mirrored via
+> Jetstream into `fed.private_variant` (mig 0028). The **discovery consensus engine**
+> (`du_db::discovery`, mig 0029) materializes them into `tree.biosample_private_variant`
+> and pools the per-sample variant sets into `tree.proposed_branch` by **variant-set
+> Jaccard** — a declarative, idempotent recompute (stable proposal ids via a
+> `cluster_key` partial-unique index, config thresholds from `tree.discovery_config`,
+> confidence = count + distinct submitters + variant-set consistency,
+> `READY_FOR_REVIEW`/`SPLIT_CANDIDATE` transitions, opt-in auto-promote off by
+> default). Promotion reassigns + freezes the contributing samples
+> (`discovery::reassign_after_promote`). Read API `GET /api/v1/discovery/proposals[/:id]`;
+> the `/curator/proposals` UI surfaces defining variants + confidence + a split banner.
+> The Scala/Slick/Tapir/"Firehose"/`mismatchingSnps`-extraction specifics below are
+> **superseded** — kept for historical context only. Memory `discovery-consensus-engine`;
+> triage `design-doc-triage-report.md` §8.
+
## Executive Summary
This document outlines a comprehensive system for evolving Y-DNA and mtDNA haplogroup trees based on discoveries from **all biosample sources**: both Citizen Biosamples (AT Protocol) and External/Publication Biosamples loaded by curators. The system manages "private branches" (proposed terminal variants), tracks consensus formation across multiple biosamples regardless of source, and provides curator oversight for tree modifications.
@@ -16,7 +45,7 @@ Before implementing the discovery system, the variant schema must be migrated to
2. **Parallel mutation handling** - Same variant name can exist for different lineages
3. **JSONB aliases** - No separate `variant_alias` table
-See: `documents/proposals/variant-schema-simplification.md`
+See: realized in `core.variant` (mig 0002 — universal JSONB coordinates/aliases).
**Key dependency**: The `tree.biosample_private_variant` and `tree.proposed_branch_variant` tables reference the variant table. The new schema changes how variants are identified:
@@ -1448,7 +1477,7 @@ decodingus.discovery {
- [ ] Remove `variant_alias` table and related code
- [ ] Rename `variant_v2` to `variant`
-**See:** `documents/proposals/variant-schema-simplification.md`
+**See:** realized in `core.variant` (mig 0002).
**Risk Mitigation:**
- Dual-write period: write to both old and new schema during transition
@@ -1683,7 +1712,7 @@ Foundation curator tools for manual tree management, independent of the automate
3. **Publication Integration**: Automatically create proposals from new publications
4. **Collaborative Curation**: Multi-curator review workflow with voting
5. **Geographic Correlation**: Analyze proposal evidence by geographic distribution
-6. **DecodingUs Naming Authority**: Establish "DU" prefix for naming discovered variants; publish in format for YBrowse aggregation (see `documents/proposals/variant-schema-simplification.md`)
+6. **DecodingUs Naming Authority**: Establish "DU" prefix for naming discovered variants; publish in format for YBrowse aggregation (see `planning/variant-naming-authority.md`)
7. **Pangenome Coordinates**: Extend variant coordinates JSONB to support graph-based pangenome references as they become available
### Scalability
diff --git a/documents/planning/ibd-matching-system.md b/documents/planning/ibd-matching-system.md
deleted file mode 100644
index 8e28c087..00000000
--- a/documents/planning/ibd-matching-system.md
+++ /dev/null
@@ -1,1381 +0,0 @@
-# IBD Matching and Relationship Discovery System
-
-## Executive Summary
-
-This document outlines a comprehensive system enabling Genetic Genealogists to discover and confirm IBD (Identity By Descent) relationships with other participating users. The system leverages the AT Protocol for decentralized consent management, coordinates with the Java-based Edge Computing Application for secure data exchange, and builds upon existing schema infrastructure (`ibd_discovery_index`, `ibd_pds_attestation`).
-
----
-
-## User Story
-
-> As a **Genetic Genealogist**
-> I need to **be able to perform IBD relationship comparisons with participating Genetic Genealogists**
-> So that I can **discover potential relatives and build my family tree**
-
----
-
-## Problem Statement
-
-Genetic genealogists need to:
-
-1. **Discover potential matches** - Find other users who may share DNA segments indicating common ancestry
-2. **Prioritize comparisons** - Focus on matches likely to be meaningful (shared contacts, similar population breakdowns)
-3. **Request consent** - Ask potential matches for permission to perform detailed IBD analysis
-4. **Exchange data securely** - Share encrypted genetic data for comparison without exposing raw sequences
-5. **Record confirmed relationships** - Persist validated matches for future discovery
-
-### Current Gap
-
-The existing system has:
-- Database schema for IBD discovery (`ibd_discovery_index`, `ibd_pds_attestation`) - **not utilized**
-- Ancestry analysis infrastructure (`ancestry_analysis`, `population`) - **not connected to matching**
-- User/PDS infrastructure - **no consent workflow**
-- No match list concept
-- No Lexicon definitions for match requests or population breakdowns
-
----
-
-## System Architecture
-
-```
-┌─────────────────────────────────────────────────────────────────────────────┐
-│ User A's Environment │
-├─────────────────────────────────────────────────────────────────────────────┤
-│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │
-│ │ Edge App │────▶│ User A's PDS │────▶│ AT Protocol │ │
-│ │ (Java) │ │ │ │ Network │ │
-│ │ │ │ • Biosample │ │ │ │
-│ │ • IBD Analysis │ │ • Match List │ │ • Firehose │ │
-│ │ • Encryption │ │ • Match Requests│ │ • XRPC │ │
-│ │ • Key Exchange │ │ • Population │ │ │ │
-│ └────────┬────────┘ └─────────────────┘ └────────┬────────┘ │
-│ │ │ │
-└───────────┼────────────────────────────────────────────────┼────────────────┘
- │ │
- │ Encrypted P2P Channel │
- │ (Edge App ↔ Edge App) │
- ▼ ▼
-┌───────────┼────────────────────────────────────────────────┼────────────────┐
-│ │ │ │
-│ ┌────────┴────────┐ ┌─────────────────┐ ┌────────┴────────┐ │
-│ │ Edge App │◀────│ User B's PDS │◀────│ AT Protocol │ │
-│ │ (Java) │ │ │ │ Network │ │
-│ │ │ │ • Biosample │ │ │ │
-│ │ • IBD Analysis │ │ • Match List │ │ • Firehose │ │
-│ │ • Encryption │ │ • Match Requests│ │ • XRPC │ │
-│ │ • Key Exchange │ │ • Population │ │ │ │
-│ └─────────────────┘ └─────────────────┘ └─────────────────┘ │
-│ User B's Environment │
-└─────────────────────────────────────────────────────────────────────────────┘
- │
- ▼
-┌─────────────────────────────────────────────────────────────────────────────┐
-│ DecodingUs AppView │
-├─────────────────────────────────────────────────────────────────────────────┤
-│ • Subscribes to Firehose for match confirmations │
-│ • Indexes confirmed matches in ibd_discovery_index │
-│ • Aggregates population data for discovery suggestions │
-│ • Provides match discovery API │
-│ • Tracks attestation consensus │
-└─────────────────────────────────────────────────────────────────────────────┘
-```
-
----
-
-## Lexicon Extensions
-
-### Namespace: `com.decodingus.atmosphere`
-
-#### 1. Match List Record (`com.decodingus.atmosphere.matchList`)
-
-Stores a user's confirmed genetic matches in their PDS.
-
-**NSID:** `com.decodingus.atmosphere.matchList`
-
-```json
-{
- "lexicon": 1,
- "id": "com.decodingus.atmosphere.matchList",
- "defs": {
- "main": {
- "type": "record",
- "description": "A user's list of confirmed genetic matches stored in their PDS.",
- "key": "tid",
- "record": {
- "type": "object",
- "required": ["ownerDid", "matches"],
- "properties": {
- "ownerDid": {
- "type": "string",
- "description": "The DID of the user who owns this match list."
- },
- "matches": {
- "type": "array",
- "description": "List of confirmed matches.",
- "items": {
- "type": "ref",
- "ref": "#confirmedMatch"
- }
- },
- "lastUpdated": {
- "type": "string",
- "format": "datetime"
- }
- }
- }
- },
- "confirmedMatch": {
- "type": "object",
- "description": "A confirmed genetic match with another user.",
- "required": ["matchedUserDid", "matchedBiosampleUri", "relationshipType", "confirmedAt"],
- "properties": {
- "matchedUserDid": {
- "type": "string",
- "description": "DID of the matched user."
- },
- "matchedBiosampleUri": {
- "type": "string",
- "description": "AT URI of the matched user's biosample record."
- },
- "relationshipType": {
- "type": "string",
- "description": "Type of genetic relationship.",
- "knownValues": ["AUTOSOMAL", "Y_CHROMOSOME", "MT_DNA", "X_CHROMOSOME"]
- },
- "totalSharedCm": {
- "type": "float",
- "description": "Total shared centimorgans (autosomal)."
- },
- "numSharedSegments": {
- "type": "integer",
- "description": "Number of shared DNA segments."
- },
- "largestSegmentCm": {
- "type": "float",
- "description": "Size of the largest shared segment in cM."
- },
- "estimatedRelationship": {
- "type": "string",
- "description": "Estimated relationship (e.g., '2nd Cousin', '3rd-4th Cousin')."
- },
- "sharedAncestors": {
- "type": "array",
- "description": "Known shared ancestors (if any).",
- "items": { "type": "string" }
- },
- "confirmedAt": {
- "type": "string",
- "format": "datetime"
- },
- "matchSignature": {
- "type": "string",
- "description": "Cryptographic signature confirming both parties agreed to this match."
- },
- "notes": {
- "type": "string",
- "description": "User notes about this match."
- }
- }
- }
- }
-}
-```
-
----
-
-#### 2. Match Request Record (`com.decodingus.atmosphere.matchRequest`)
-
-A request from one user to another for IBD comparison (similar to Bluesky DMs).
-
-**NSID:** `com.decodingus.atmosphere.matchRequest`
-
-```json
-{
- "lexicon": 1,
- "id": "com.decodingus.atmosphere.matchRequest",
- "defs": {
- "main": {
- "type": "record",
- "description": "A request to perform IBD comparison with another user.",
- "key": "tid",
- "record": {
- "type": "object",
- "required": ["requesterDid", "requesterBiosampleUri", "targetDid", "requestType", "status", "createdAt"],
- "properties": {
- "requesterDid": {
- "type": "string",
- "description": "DID of the user initiating the match request."
- },
- "requesterBiosampleUri": {
- "type": "string",
- "description": "AT URI of the requester's biosample."
- },
- "targetDid": {
- "type": "string",
- "description": "DID of the user being requested for a match."
- },
- "targetBiosampleUri": {
- "type": "string",
- "description": "AT URI of the target's biosample (if known)."
- },
- "requestType": {
- "type": "string",
- "description": "Type of comparison requested.",
- "knownValues": ["AUTOSOMAL", "Y_CHROMOSOME", "MT_DNA", "FULL"]
- },
- "status": {
- "type": "string",
- "description": "Current status of the request.",
- "knownValues": ["PENDING", "ACCEPTED", "REJECTED", "EXPIRED", "COMPLETED", "CANCELLED"]
- },
- "discoveryReason": {
- "type": "ref",
- "ref": "#discoveryReason",
- "description": "Why this match was suggested."
- },
- "message": {
- "type": "string",
- "description": "Optional message from requester explaining interest."
- },
- "createdAt": {
- "type": "string",
- "format": "datetime"
- },
- "expiresAt": {
- "type": "string",
- "format": "datetime",
- "description": "Request expiration (default 30 days)."
- },
- "respondedAt": {
- "type": "string",
- "format": "datetime"
- },
- "responseMessage": {
- "type": "string",
- "description": "Response message from target user."
- }
- }
- }
- },
- "discoveryReason": {
- "type": "object",
- "description": "Reason this match was suggested.",
- "properties": {
- "reasonType": {
- "type": "string",
- "knownValues": ["SHARED_MATCH", "POPULATION_OVERLAP", "HAPLOGROUP_MATCH", "MANUAL"]
- },
- "sharedMatchDids": {
- "type": "array",
- "description": "DIDs of users both parties match with.",
- "items": { "type": "string" }
- },
- "populationOverlapScore": {
- "type": "float",
- "description": "Score indicating population breakdown similarity (0-1)."
- },
- "sharedHaplogroup": {
- "type": "string",
- "description": "Shared terminal haplogroup (Y-DNA or mtDNA)."
- }
- }
- }
- }
-}
-```
-
----
-
-#### 3. Population Breakdown Record (`com.decodingus.atmosphere.populationBreakdown`)
-
-Ancestry composition data stored in the user's PDS.
-
-**NSID:** `com.decodingus.atmosphere.populationBreakdown`
-
-```json
-{
- "lexicon": 1,
- "id": "com.decodingus.atmosphere.populationBreakdown",
- "defs": {
- "main": {
- "type": "record",
- "description": "Ancestry population breakdown for a biosample.",
- "key": "tid",
- "record": {
- "type": "object",
- "required": ["biosampleUri", "analysisMethod", "populations", "analyzedAt"],
- "properties": {
- "biosampleUri": {
- "type": "string",
- "description": "AT URI of the biosample this breakdown belongs to."
- },
- "analysisMethod": {
- "type": "string",
- "description": "Method/algorithm used for analysis (e.g., 'ADMIXTURE_K12', 'PCA_REFERENCE')."
- },
- "referencePanel": {
- "type": "string",
- "description": "Reference panel used (e.g., 'Human Origins', '1000 Genomes')."
- },
- "populations": {
- "type": "array",
- "description": "Population percentages.",
- "items": {
- "type": "ref",
- "ref": "#populationComponent"
- }
- },
- "analyzedAt": {
- "type": "string",
- "format": "datetime"
- },
- "confidenceLevel": {
- "type": "string",
- "description": "Overall confidence in the breakdown.",
- "knownValues": ["HIGH", "MEDIUM", "LOW"]
- }
- }
- }
- },
- "populationComponent": {
- "type": "object",
- "description": "A single population component in the breakdown.",
- "required": ["populationName", "percentage"],
- "properties": {
- "populationName": {
- "type": "string",
- "description": "Name of the population (e.g., 'Northern European', 'East Asian')."
- },
- "populationCode": {
- "type": "string",
- "description": "Standardized code for the population."
- },
- "percentage": {
- "type": "float",
- "description": "Percentage of ancestry from this population (0-100)."
- },
- "confidenceInterval": {
- "type": "ref",
- "ref": "#confidenceInterval"
- },
- "parentPopulation": {
- "type": "string",
- "description": "Parent population category for hierarchical breakdowns."
- }
- }
- },
- "confidenceInterval": {
- "type": "object",
- "properties": {
- "lower": { "type": "float" },
- "upper": { "type": "float" }
- }
- }
- }
-}
-```
-
----
-
-#### 4. Match Consent Vote Record (`com.decodingus.atmosphere.matchConsent`)
-
-Records a user's consent decision for a match request. Both users must have matching consent records for a match to be confirmed.
-
-**NSID:** `com.decodingus.atmosphere.matchConsent`
-
-```json
-{
- "lexicon": 1,
- "id": "com.decodingus.atmosphere.matchConsent",
- "defs": {
- "main": {
- "type": "record",
- "description": "A user's consent vote for a match comparison.",
- "key": "tid",
- "record": {
- "type": "object",
- "required": ["matchRequestUri", "voterDid", "vote", "votedAt"],
- "properties": {
- "matchRequestUri": {
- "type": "string",
- "description": "AT URI of the match request this consent applies to."
- },
- "voterDid": {
- "type": "string",
- "description": "DID of the user casting this vote."
- },
- "voterBiosampleUri": {
- "type": "string",
- "description": "AT URI of the voter's biosample."
- },
- "vote": {
- "type": "string",
- "description": "The consent decision.",
- "knownValues": ["ACCEPT", "REJECT", "DEFER"]
- },
- "votedAt": {
- "type": "string",
- "format": "datetime"
- },
- "expiresAt": {
- "type": "string",
- "format": "datetime",
- "description": "When this consent expires (requires renewal)."
- },
- "scope": {
- "type": "array",
- "description": "What data can be shared.",
- "items": {
- "type": "string",
- "knownValues": ["SEGMENT_POSITIONS", "SHARED_CM_TOTAL", "HAPLOGROUP", "POPULATION_OVERLAP"]
- }
- },
- "signature": {
- "type": "string",
- "description": "Cryptographic signature of the consent."
- }
- }
- }
- }
- }
-}
-```
-
----
-
-## Match Discovery Workflow
-
-### Phase 1: Discovery Suggestions
-
-Users can discover potential matches through several mechanisms:
-
-#### 1a. Shared Match Discovery
-
-```
-User A has matches: [M1, M2, M3, M4]
-User B has matches: [M2, M3, M5, M6]
-
-Shared matches: [M2, M3]
-
-If |shared| >= threshold (configurable, default 2):
- → Suggest A and B as potential matches
- → Higher shared count = higher suggestion priority
-```
-
-#### 1b. Population Overlap Discovery
-
-```
-User A population: {Northern European: 45%, British Isles: 30%, Germanic: 15%, ...}
-User B population: {Northern European: 50%, British Isles: 25%, Scandinavian: 15%, ...}
-
-Overlap Score = Σ min(A[pop], B[pop]) for all populations
- = min(45,50) + min(30,25) + ...
- = 45 + 25 + ...
-
-If overlapScore >= threshold (configurable, default 60%):
- → Suggest A and B as potential matches
-```
-
-#### 1c. Haplogroup Match Discovery
-
-```
-User A: Y-DNA R-M269, mtDNA H1a
-User B: Y-DNA R-M269, mtDNA J1c
-
-If A.yHaplogroup == B.yHaplogroup (for males):
- → Suggest Y-DNA comparison
- → Priority based on terminal depth match
-
-If A.mtHaplogroup == B.mtHaplogroup:
- → Suggest mtDNA comparison
-```
-
-### Phase 2: Match Request Flow
-
-```
-┌─────────────┐ ┌─────────────┐
-│ User A │ │ User B │
-│ (Requester)│ │ (Target) │
-└──────┬──────┘ └──────┬──────┘
- │ │
- │ 1. Create matchRequest in A's PDS │
- │────────────────────────────────────────▶│
- │ (status: PENDING) │
- │ │
- │ 2. AT Protocol delivers to B's PDS │
- │ (B sees pending request) │
- │ │
- │ 3. B reviews request │
- │ (sees discovery reason)
- │ │
- │ 4. B creates matchConsent in B's PDS │
- │◀────────────────────────────────────────│
- │ (vote: ACCEPT) │
- │ │
- │ 5. A creates matchConsent in A's PDS │
- │────────────────────────────────────────▶│
- │ (vote: ACCEPT) │
- │ │
- │ ═══════════════════════════════════════│
- │ Both consents present = Ready for IBD │
- │ ═══════════════════════════════════════│
- │ │
-```
-
-### Phase 3: IBD Analysis (Edge App Coordination)
-
-Once both users consent, the Edge Apps coordinate the actual analysis:
-
-```
-┌─────────────────────────────────────────────────────────────────────────────┐
-│ EDGE APP COORDINATION PROTOCOL │
-├─────────────────────────────────────────────────────────────────────────────┤
-│ │
-│ 1. KEY EXCHANGE │
-│ ┌──────────────┐ ┌──────────────┐ │
-│ │ Edge App A │───── ECDH Key Exchange ──│ Edge App B │ │
-│ │ │ (via AT Protocol) │ │ │
-│ └──────────────┘ └──────────────┘ │
-│ │
-│ 2. ENCRYPTED DATA EXCHANGE │
-│ • App A encrypts variant positions with shared key │
-│ • App A sends encrypted payload to App B (P2P or relay) │
-│ • App B decrypts and performs local comparison │
-│ • App B encrypts results and sends back │
-│ │
-│ 3. RESULT VERIFICATION │
-│ • Both apps independently calculate shared segments │
-│ • Results are hashed and compared │
-│ • Matching hashes confirm valid analysis │
-│ │
-│ 4. ATTESTATION │
-│ • Both apps sign the match result │
-│ • Attestations written to respective PDS │
-│ • DecodingUs indexes confirmed match │
-│ │
-└─────────────────────────────────────────────────────────────────────────────┘
-```
-
-### Phase 4: Match Confirmation and Indexing
-
-```
-┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐
-│ User A │ │ User B │ │ DecodingUs AppView │
-│ PDS │ │ PDS │ │ │
-└──────┬──────┘ └──────┬──────┘ └──────────┬──────────┘
- │ │ │
- │ attestation A │ attestation B │
- │───────────────────┼───────────────────────▶
- │ │ │
- │ │ Firehose events │
- │ │ │
- │ │ ┌───────────────────┤
- │ │ │ Verify signatures │
- │ │ │ Match attestations│
- │ │ │ Index in DB │
- │ │ └───────────────────┤
- │ │ │
- │ │ ibd_discovery_index │
- │ │ ibd_pds_attestation │
- │ │ │
-```
-
----
-
-## Database Schema Extensions
-
-### New Tables (in `public` schema, or consider `matching` schema)
-
-```sql
--- Evolution XX: IBD Matching System Extensions
-
--- ============================================================================
--- PART 1: Match Discovery Tables
--- ============================================================================
-
--- Match suggestions generated by the discovery engine
-CREATE TABLE match_suggestion (
- id BIGSERIAL PRIMARY KEY,
- suggester_sample_guid UUID NOT NULL,
- suggested_sample_guid UUID NOT NULL,
- suggestion_type VARCHAR(50) NOT NULL
- CHECK (suggestion_type IN ('SHARED_MATCH', 'POPULATION_OVERLAP', 'HAPLOGROUP_MATCH')),
- score DOUBLE PRECISION NOT NULL,
- metadata JSONB, -- Stores reason details (shared match DIDs, overlap score, etc.)
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- expires_at TIMESTAMP,
- status VARCHAR(20) NOT NULL DEFAULT 'ACTIVE'
- CHECK (status IN ('ACTIVE', 'DISMISSED', 'CONVERTED', 'EXPIRED')),
- UNIQUE(suggester_sample_guid, suggested_sample_guid, suggestion_type)
-);
-
-CREATE INDEX idx_ms_suggester ON match_suggestion(suggester_sample_guid);
-CREATE INDEX idx_ms_suggested ON match_suggestion(suggested_sample_guid);
-CREATE INDEX idx_ms_type ON match_suggestion(suggestion_type);
-CREATE INDEX idx_ms_score ON match_suggestion(score DESC);
-
--- ============================================================================
--- PART 2: Population Overlap Caching
--- ============================================================================
-
--- Cached population breakdowns for efficient overlap calculation
-CREATE TABLE population_breakdown_cache (
- id BIGSERIAL PRIMARY KEY,
- sample_guid UUID NOT NULL UNIQUE,
- citizen_did VARCHAR(255),
- analysis_method VARCHAR(100) NOT NULL,
- breakdown JSONB NOT NULL, -- {populationCode: percentage, ...}
- breakdown_hash VARCHAR(64) NOT NULL, -- For change detection
- source_at_uri VARCHAR(500),
- cached_at TIMESTAMP NOT NULL DEFAULT NOW(),
- expires_at TIMESTAMP
-);
-
-CREATE INDEX idx_pbc_sample ON population_breakdown_cache(sample_guid);
-CREATE INDEX idx_pbc_did ON population_breakdown_cache(citizen_did);
-CREATE INDEX idx_pbc_method ON population_breakdown_cache(analysis_method);
-
--- Pre-computed population overlap scores for discovery
-CREATE TABLE population_overlap_score (
- id BIGSERIAL PRIMARY KEY,
- sample_guid_1 UUID NOT NULL,
- sample_guid_2 UUID NOT NULL,
- overlap_score DOUBLE PRECISION NOT NULL,
- analysis_method VARCHAR(100) NOT NULL,
- computed_at TIMESTAMP NOT NULL DEFAULT NOW(),
- UNIQUE(LEAST(sample_guid_1, sample_guid_2), GREATEST(sample_guid_1, sample_guid_2), analysis_method)
-);
-
-CREATE INDEX idx_pos_sample1 ON population_overlap_score(sample_guid_1);
-CREATE INDEX idx_pos_sample2 ON population_overlap_score(sample_guid_2);
-CREATE INDEX idx_pos_score ON population_overlap_score(overlap_score DESC);
-
--- ============================================================================
--- PART 3: Match Request Tracking
--- ============================================================================
-
--- Local tracking of match requests (supplements PDS records)
-CREATE TABLE match_request_tracking (
- id BIGSERIAL PRIMARY KEY,
- request_at_uri VARCHAR(500) NOT NULL UNIQUE,
- requester_did VARCHAR(255) NOT NULL,
- requester_sample_guid UUID NOT NULL,
- target_did VARCHAR(255) NOT NULL,
- target_sample_guid UUID,
- request_type VARCHAR(50) NOT NULL,
- status VARCHAR(50) NOT NULL,
- discovery_reason JSONB,
- created_at TIMESTAMP NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
- expires_at TIMESTAMP,
- completed_at TIMESTAMP
-);
-
-CREATE INDEX idx_mrt_requester ON match_request_tracking(requester_did);
-CREATE INDEX idx_mrt_target ON match_request_tracking(target_did);
-CREATE INDEX idx_mrt_status ON match_request_tracking(status);
-
--- Consent votes (local index of PDS consent records)
-CREATE TABLE match_consent_tracking (
- id BIGSERIAL PRIMARY KEY,
- consent_at_uri VARCHAR(500) NOT NULL UNIQUE,
- match_request_at_uri VARCHAR(500) NOT NULL,
- voter_did VARCHAR(255) NOT NULL,
- voter_sample_guid UUID NOT NULL,
- vote VARCHAR(20) NOT NULL CHECK (vote IN ('ACCEPT', 'REJECT', 'DEFER')),
- scope JSONB,
- signature TEXT NOT NULL,
- voted_at TIMESTAMP NOT NULL,
- expires_at TIMESTAMP,
- UNIQUE(match_request_at_uri, voter_did)
-);
-
-CREATE INDEX idx_mct_request ON match_consent_tracking(match_request_at_uri);
-CREATE INDEX idx_mct_voter ON match_consent_tracking(voter_did);
-
--- ============================================================================
--- PART 4: Extend existing IBD tables
--- ============================================================================
-
--- Add match request reference to ibd_discovery_index
-ALTER TABLE ibd_discovery_index
- ADD COLUMN match_request_at_uri VARCHAR(500),
- ADD COLUMN requester_did VARCHAR(255),
- ADD COLUMN target_did VARCHAR(255);
-
-CREATE INDEX idx_ibd_request_uri ON ibd_discovery_index(match_request_at_uri);
-```
-
----
-
-## Service Layer
-
-### 1. MatchDiscoveryService
-
-Generates match suggestions based on various criteria.
-
-```scala
-trait MatchDiscoveryService {
- /**
- * Find potential matches for a user based on shared matches.
- */
- def findSharedMatchSuggestions(
- sampleGuid: UUID,
- minSharedMatches: Int = 2
- ): Future[Seq[MatchSuggestion]]
-
- /**
- * Find potential matches based on population overlap.
- */
- def findPopulationOverlapSuggestions(
- sampleGuid: UUID,
- minOverlapScore: Double = 0.6
- ): Future[Seq[MatchSuggestion]]
-
- /**
- * Find potential matches based on shared haplogroups.
- */
- def findHaplogroupMatchSuggestions(
- sampleGuid: UUID,
- haplogroupType: HaplogroupType
- ): Future[Seq[MatchSuggestion]]
-
- /**
- * Get all suggestions for a user, ranked by score.
- */
- def getSuggestionsForUser(
- userDid: String,
- limit: Int = 50
- ): Future[Seq[RankedMatchSuggestion]]
-
- /**
- * Dismiss a suggestion (user not interested).
- */
- def dismissSuggestion(suggestionId: Long, userDid: String): Future[Boolean]
-}
-```
-
-### 2. MatchRequestService
-
-Manages match request lifecycle.
-
-```scala
-trait MatchRequestService {
- /**
- * Create a new match request.
- * Writes to requester's PDS and tracks locally.
- */
- def createMatchRequest(
- requesterDid: String,
- requesterBiosampleUri: String,
- targetDid: String,
- requestType: MatchRequestType,
- discoveryReason: Option[DiscoveryReason],
- message: Option[String]
- ): Future[MatchRequest]
-
- /**
- * Get pending requests for a user (as target).
- */
- def getPendingRequestsForUser(targetDid: String): Future[Seq[MatchRequest]]
-
- /**
- * Get requests initiated by a user.
- */
- def getRequestsByUser(requesterDid: String): Future[Seq[MatchRequest]]
-
- /**
- * Record a consent vote for a request.
- */
- def recordConsent(
- matchRequestUri: String,
- voterDid: String,
- voterBiosampleUri: String,
- vote: ConsentVote,
- scope: Seq[ConsentScope]
- ): Future[MatchConsent]
-
- /**
- * Check if both parties have consented.
- */
- def checkMutualConsent(matchRequestUri: String): Future[Option[MutualConsent]]
-
- /**
- * Cancel a pending request.
- */
- def cancelRequest(requestUri: String, requesterDid: String): Future[Boolean]
-}
-```
-
-### 3. PopulationAnalysisService
-
-Manages population breakdown data and overlap calculations.
-
-```scala
-trait PopulationAnalysisService {
- /**
- * Cache a population breakdown from PDS.
- */
- def cachePopulationBreakdown(
- sampleGuid: UUID,
- citizenDid: String,
- breakdown: PopulationBreakdown
- ): Future[Unit]
-
- /**
- * Calculate overlap score between two samples.
- */
- def calculateOverlapScore(
- sampleGuid1: UUID,
- sampleGuid2: UUID
- ): Future[Double]
-
- /**
- * Batch compute overlap scores for a sample against all others.
- */
- def computeOverlapScoresForSample(sampleGuid: UUID): Future[Int]
-
- /**
- * Get population breakdown for a sample.
- */
- def getBreakdown(sampleGuid: UUID): Future[Option[PopulationBreakdown]]
-}
-```
-
-### 4. IbdMatchingService
-
-Handles IBD match indexing and querying. **Coordinates with Edge App for actual analysis.**
-
-```scala
-trait IbdMatchingService {
- /**
- * Record a confirmed IBD match from Edge App attestations.
- */
- def recordConfirmedMatch(
- sampleGuid1: UUID,
- sampleGuid2: UUID,
- matchDetails: IbdMatchDetails,
- attestation1: IbdAttestation,
- attestation2: IbdAttestation
- ): Future[IbdDiscoveryIndex]
-
- /**
- * Get all matches for a sample.
- */
- def getMatchesForSample(sampleGuid: UUID): Future[Seq[IbdMatch]]
-
- /**
- * Get match details between two samples.
- */
- def getMatchBetween(sampleGuid1: UUID, sampleGuid2: UUID): Future[Option[IbdMatch]]
-
- /**
- * Verify attestation signatures.
- */
- def verifyAttestations(
- attestation1: IbdAttestation,
- attestation2: IbdAttestation,
- matchHash: String
- ): Future[Boolean]
-
- /**
- * Update consensus status based on attestations.
- */
- def updateConsensusStatus(indexId: Long): Future[ConsensusStatus]
-}
-```
-
----
-
-## Edge App Coordination Points
-
-### Interface Contract: DecodingUs ↔ Edge App
-
-The Edge App (Java) must implement these coordination points:
-
-```
-┌─────────────────────────────────────────────────────────────────────────────┐
-│ EDGE APP INTERFACE CONTRACT │
-├─────────────────────────────────────────────────────────────────────────────┤
-│ │
-│ 1. NOTIFICATION CHANNEL │
-│ ──────────────────── │
-│ DecodingUs → Edge App: Notify of mutual consent │
-│ │
-│ Endpoint: POST /api/edge/v1/match-ready │
-│ Payload: { │
-│ matchRequestUri: String, │
-│ partnerDid: String, │
-│ partnerPdsUrl: String, │
-│ requestType: "AUTOSOMAL" | "Y_CHROMOSOME" | "MT_DNA" | "FULL", │
-│ consentScope: ["SEGMENT_POSITIONS", "SHARED_CM_TOTAL", ...] │
-│ } │
-│ │
-│ 2. KEY EXCHANGE PROTOCOL │
-│ ───────────────────── │
-│ Edge App ↔ Edge App: ECDH key agreement │
-│ │
-│ • Use AT Protocol for key exchange messages │
-│ • Lexicon: com.decodingus.edge.keyExchange │
-│ • Keys rotated per comparison session │
-│ │
-│ 3. DATA EXCHANGE FORMAT │
-│ ───────────────────── │
-│ Edge App A → Edge App B: Encrypted variant data │
-│ │
-│ Format: { │
-│ sessionId: UUID, │
-│ encryptedPayload: Base64, // AES-256-GCM encrypted │
-│ iv: Base64, │
-│ authTag: Base64, │
-│ dataType: "VARIANT_POSITIONS" | "SEGMENT_BOUNDARIES" │
-│ } │
-│ │
-│ 4. RESULT ATTESTATION │
-│ ────────────────── │
-│ Edge App → DecodingUs: Submit match results │
-│ │
-│ Endpoint: POST /api/v1/ibd/attestation │
-│ Payload: { │
-│ matchRequestUri: String, │
-│ attestingDid: String, │
-│ attestingSampleGuid: UUID, │
-│ matchSummary: { │
-│ totalSharedCm: Double, │
-│ numSegments: Int, │
-│ largestSegmentCm: Double, │
-│ regionType: String │
-│ }, │
-│ matchSummaryHash: String, // SHA-256 of canonical summary │
-│ signature: String, // Ed25519 signature with PDS key │
-│ partnerSummaryHash: String // Hash received from partner │
-│ } │
-│ │
-│ 5. SECURITY REQUIREMENTS │
-│ ───────────────────── │
-│ • All data encrypted in transit (TLS 1.3+) │
-│ • All data encrypted at rest on Edge App │
-│ • Variant data never stored on DecodingUs servers │
-│ • Only match summaries (cM, segments) indexed │
-│ • Keys derived from PDS signing keys (verifiable) │
-│ │
-└─────────────────────────────────────────────────────────────────────────────┘
-```
-
-### Edge App Responsibilities
-
-| Responsibility | Description |
-|---------------|-------------|
-| **Variant Storage** | Securely store user's variant calls locally |
-| **Key Management** | Generate/rotate session keys for P2P exchange |
-| **IBD Algorithm** | Implement IBD segment detection algorithm |
-| **Encryption** | Encrypt/decrypt variant data for exchange |
-| **P2P Communication** | Establish direct connection with partner Edge App |
-| **Result Signing** | Sign match results with user's PDS key |
-| **UI/UX** | Present match requests, manage consent workflow |
-
-### DecodingUs Responsibilities
-
-| Responsibility | Description |
-|---------------|-------------|
-| **Discovery Engine** | Generate match suggestions |
-| **Request Routing** | Track match requests across PDS |
-| **Consent Verification** | Verify mutual consent before triggering Edge Apps |
-| **Attestation Indexing** | Index confirmed matches in `ibd_discovery_index` |
-| **Match Querying** | Provide API for match list queries |
-| **Population Caching** | Cache population data for overlap calculations |
-
----
-
-## API Endpoints
-
-### Discovery API
-
-```
-# Match Suggestions
-GET /api/v1/discovery/suggestions
- ?type={SHARED_MATCH|POPULATION_OVERLAP|HAPLOGROUP_MATCH}
- &limit={int}
- → Seq[RankedMatchSuggestion]
-
-POST /api/v1/discovery/suggestions/{id}/dismiss
- → { success: Boolean }
-
-# Population Analysis
-GET /api/v1/discovery/population/{sampleGuid}
- → PopulationBreakdown
-
-GET /api/v1/discovery/population/overlap/{sampleGuid1}/{sampleGuid2}
- → { overlapScore: Double }
-```
-
-### Match Request API
-
-```
-# Match Requests
-POST /api/v1/matches/request
- Body: CreateMatchRequest
- → MatchRequest
-
-GET /api/v1/matches/requests/pending
- → Seq[MatchRequest]
-
-GET /api/v1/matches/requests/sent
- → Seq[MatchRequest]
-
-POST /api/v1/matches/requests/{uri}/cancel
- → { success: Boolean }
-
-# Consent
-POST /api/v1/matches/consent
- Body: CreateMatchConsent
- → MatchConsent
-
-GET /api/v1/matches/consent/status/{requestUri}
- → ConsentStatus
-```
-
-### IBD Results API
-
-```
-# Confirmed Matches
-GET /api/v1/matches/confirmed
- ?regionType={AUTOSOMAL|Y_CHROMOSOME|MT_DNA}
- &minCm={double}
- → Seq[IbdMatch]
-
-GET /api/v1/matches/confirmed/{sampleGuid}
- → Seq[IbdMatch]
-
-# Attestation (called by Edge App)
-POST /api/v1/ibd/attestation
- Body: IbdAttestation
- → { indexed: Boolean, consensusStatus: String }
-```
-
----
-
-## Tapir Endpoint Definitions
-
-```scala
-object MatchDiscoveryEndpoints {
-
- val getSuggestions: Endpoint[String, SuggestionQuery, ApiError, Seq[RankedMatchSuggestion], Any] =
- endpoint.get
- .securityIn(auth.bearer[String]())
- .in("api" / "v1" / "discovery" / "suggestions")
- .in(query[Option[String]]("type"))
- .in(query[Int]("limit").default(50))
- .out(jsonBody[Seq[RankedMatchSuggestion]])
- .errorOut(jsonBody[ApiError])
-
- val getPopulationOverlap: Endpoint[String, (UUID, UUID), ApiError, OverlapResult, Any] =
- endpoint.get
- .securityIn(auth.bearer[String]())
- .in("api" / "v1" / "discovery" / "population" / "overlap")
- .in(path[UUID]("sampleGuid1") / path[UUID]("sampleGuid2"))
- .out(jsonBody[OverlapResult])
- .errorOut(jsonBody[ApiError])
-}
-
-object MatchRequestEndpoints {
-
- val createRequest: Endpoint[String, CreateMatchRequest, ApiError, MatchRequest, Any] =
- endpoint.post
- .securityIn(auth.bearer[String]())
- .in("api" / "v1" / "matches" / "request")
- .in(jsonBody[CreateMatchRequest])
- .out(jsonBody[MatchRequest])
- .errorOut(jsonBody[ApiError])
-
- val recordConsent: Endpoint[String, CreateMatchConsent, ApiError, MatchConsent, Any] =
- endpoint.post
- .securityIn(auth.bearer[String]())
- .in("api" / "v1" / "matches" / "consent")
- .in(jsonBody[CreateMatchConsent])
- .out(jsonBody[MatchConsent])
- .errorOut(jsonBody[ApiError])
-}
-
-object IbdEndpoints {
-
- val submitAttestation: Endpoint[String, IbdAttestationRequest, ApiError, AttestationResult, Any] =
- endpoint.post
- .securityIn(auth.bearer[String]()) // Edge App auth
- .in("api" / "v1" / "ibd" / "attestation")
- .in(jsonBody[IbdAttestationRequest])
- .out(jsonBody[AttestationResult])
- .errorOut(jsonBody[ApiError])
-
- val getConfirmedMatches: Endpoint[String, MatchQuery, ApiError, Seq[IbdMatch], Any] =
- endpoint.get
- .securityIn(auth.bearer[String]())
- .in("api" / "v1" / "matches" / "confirmed")
- .in(query[Option[String]]("regionType"))
- .in(query[Option[Double]]("minCm"))
- .out(jsonBody[Seq[IbdMatch]])
- .errorOut(jsonBody[ApiError])
-}
-```
-
----
-
-## Security Considerations
-
-### Data Privacy
-
-| Data Type | Storage Location | Encryption |
-|-----------|-----------------|------------|
-| Variant calls | Edge App only | AES-256 at rest |
-| Match requests | User PDS | AT Protocol signing |
-| Consent votes | User PDS | AT Protocol signing |
-| Match summaries | DecodingUs DB | Standard DB encryption |
-| Population breakdowns | User PDS + cache | AT Protocol + DB encryption |
-
-### Authentication & Authorization
-
-1. **User Authentication**: OAuth2/DID-based via AT Protocol
-2. **Edge App Authentication**: API keys + request signing
-3. **Consent Verification**: Dual-signature requirement before data exchange
-4. **Rate Limiting**: Prevent discovery enumeration attacks
-
-### Cryptographic Requirements
-
-```
-Key Exchange: ECDH (X25519)
-Data Encryption: AES-256-GCM
-Signatures: Ed25519 (AT Protocol standard)
-Hashing: SHA-256 for match summaries
-```
-
----
-
-## Implementation Phases
-
-### Phase 1: Lexicon & Schema
-
-**Scope:**
-- Define and publish Lexicon extensions
-- Database schema migration
-- Repository layer for new tables
-
-**Deliverables:**
-- [ ] Lexicon JSON files for matchList, matchRequest, populationBreakdown, matchConsent
-- [ ] Database evolution script
-- [ ] `MatchSuggestionRepository`
-- [ ] `PopulationBreakdownCacheRepository`
-- [ ] `MatchRequestTrackingRepository`
-- [ ] `MatchConsentTrackingRepository`
-
-**Edge App Coordination:**
-- Share Lexicon definitions with Edge App team
-- Agree on key exchange protocol
-
-### Phase 2: Discovery Engine
-
-**Scope:**
-- Implement match suggestion algorithms
-- Population overlap calculation
-- Suggestion ranking
-
-**Deliverables:**
-- [ ] `MatchDiscoveryService` implementation
-- [ ] `PopulationAnalysisService` implementation
-- [ ] Background job for overlap score computation
-- [ ] Discovery API endpoints
-
-**Edge App Coordination:**
-- None required (server-side only)
-
-### Phase 3: Request & Consent Flow
-
-**Scope:**
-- Match request lifecycle management
-- Consent voting and verification
-- PDS record creation via AT Protocol
-
-**Deliverables:**
-- [ ] `MatchRequestService` implementation
-- [ ] AT Protocol client for PDS writes
-- [ ] Firehose listener for consent records
-- [ ] Match request API endpoints
-
-**Edge App Coordination:**
-- Coordinate on consent UI/UX
-- Define notification webhook contract
-
-### Phase 4: IBD Integration
-
-**Scope:**
-- Edge App notification on mutual consent
-- Attestation submission endpoint
-- Match indexing and consensus tracking
-
-**Deliverables:**
-- [ ] `IbdMatchingService` implementation
-- [ ] Extend existing `IbdDiscoveryIndexRepository`
-- [ ] Extend existing `IbdPdsAttestationRepository`
-- [ ] IBD API endpoints
-- [ ] Attestation verification logic
-
-**Edge App Coordination:**
-- **CRITICAL**: Define and test data exchange protocol
-- Implement key exchange mechanism
-- Test P2P encrypted communication
-- Verify attestation signing/verification
-
-### Phase 5: UI & Notifications
-
-**Scope:**
-- User-facing match discovery interface
-- Request/consent management UI
-- Match list visualization
-
-**Deliverables:**
-- [ ] Twirl templates for discovery pages
-- [ ] Match request notification system
-- [ ] Match list dashboard
-- [ ] Population breakdown visualization
-
-**Edge App Coordination:**
-- Coordinate on consistent UX across platforms
-- Define deep-linking for match requests
-
----
-
-## Configuration
-
-```hocon
-decodingus.matching {
- discovery {
- shared-match-threshold = 2 # Minimum shared matches for suggestion
- population-overlap-threshold = 0.6 # Minimum overlap score (0-1)
- suggestion-expiry-days = 90
- max-suggestions-per-user = 100
- }
-
- requests {
- default-expiry-days = 30
- max-pending-requests = 50
- consent-expiry-days = 365
- }
-
- ibd {
- attestation-timeout-hours = 24 # Time for both attestations
- min-shared-cm-to-index = 7.0 # Don't index tiny matches
- }
-
- edge-app {
- notification-webhook-timeout = 30.seconds
- retry-attempts = 3
- }
-}
-```
-
----
-
-## Testing Strategy
-
-### Unit Tests
-- Overlap score calculation
-- Suggestion ranking algorithm
-- Consent verification logic
-- Attestation signature verification
-
-### Integration Tests
-- Full request → consent → attestation flow
-- Firehose event processing
-- Edge App webhook delivery
-
-### End-to-End Tests
-- Complete match discovery workflow
-- Cross-PDS consent synchronization
-
-### Edge App Integration Testing
-- **Joint testing required** with Edge App team
-- Key exchange protocol verification
-- Encrypted data round-trip
-- Attestation interoperability
-
----
-
-## Monitoring & Metrics
-
-### Key Metrics
-
-- Suggestions generated per day
-- Request conversion rate (suggestion → request)
-- Consent acceptance rate
-- Average time to mutual consent
-- Attestation success rate
-- Match indexing rate
-
-### Alerts
-
-- Attestation verification failures
-- Consent timeout rate spike
-- Edge App webhook failures
-- Population cache staleness
-
----
-
-## Future Considerations
-
-1. **Group Matching**: Support for family/surname project group comparisons
-2. **Triangulation**: Automated triangulation detection across multiple matches
-3. **Chromosome Browser**: Visual segment comparison (requires Edge App coordination)
-4. **Match Notes Sync**: Synchronize match notes across users
-5. **Relationship Prediction ML**: Machine learning for relationship estimation
-
----
-
-## Appendix: Existing Schema Reference
-
-### ibd_discovery_index (Evolution 7)
-
-```sql
-CREATE TABLE public.ibd_discovery_index (
- id BIGSERIAL PRIMARY KEY,
- sample_guid_1 UUID NOT NULL,
- sample_guid_2 UUID NOT NULL,
- pangenome_graph_id INTEGER NOT NULL,
- match_region_type VARCHAR(50) NOT NULL, -- AUTOSOMAL, Y_CHROMOSOME, etc.
- total_shared_cm_approx DOUBLE PRECISION,
- num_shared_segments_approx INTEGER,
- is_publicly_discoverable BOOLEAN DEFAULT FALSE,
- consensus_status VARCHAR(50) DEFAULT 'INITIATED',
- last_consensus_update TIMESTAMP DEFAULT NOW(),
- validation_service_guid UUID,
- validation_timestamp TIMESTAMP,
- indexed_by_service VARCHAR(255),
- indexed_date TIMESTAMP DEFAULT NOW()
-);
-```
-
-### ibd_pds_attestation (Evolution 7)
-
-```sql
-CREATE TABLE public.ibd_pds_attestation (
- id BIGSERIAL PRIMARY KEY,
- ibd_discovery_index_id BIGINT NOT NULL,
- attesting_pds_guid UUID NOT NULL,
- attesting_sample_guid UUID NOT NULL,
- attestation_timestamp TIMESTAMP DEFAULT NOW(),
- attestation_signature TEXT NOT NULL,
- match_summary_hash VARCHAR(255) NOT NULL,
- attestation_type VARCHAR(50) NOT NULL, -- INITIAL_REPORT, CONFIRMATION, etc.
- attestation_notes TEXT
-);
-```
-
-### ancestry_analysis (Evolution 1)
-
-```sql
-CREATE TABLE ancestry_analysis (
- ancestry_analysis_id SERIAL PRIMARY KEY,
- sample_guid UUID NOT NULL,
- analysis_method_id INT NOT NULL,
- population_id INT NOT NULL,
- probability DECIMAL(5, 4)
-);
-```
diff --git a/documents/planning/jsonb-consolidation-analysis.md b/documents/planning/jsonb-consolidation-analysis.md
deleted file mode 100644
index 1f80f299..00000000
--- a/documents/planning/jsonb-consolidation-analysis.md
+++ /dev/null
@@ -1,553 +0,0 @@
-# JSONB Consolidation Analysis
-
-## Executive Summary
-
-This document analyzes the current database schema to identify tables that would be better served as JSONB columns on their parent tables. The analysis considers query patterns, reporting performance, cardinality relationships, and PostgreSQL JSONB capabilities.
-
-**Key Finding:** 7 tables are strong candidates for JSONB consolidation, potentially eliminating 5-7 tables while improving data locality and reducing JOIN overhead for common access patterns.
-
----
-
-## Evaluation Criteria
-
-Each table was evaluated against:
-
-| Criterion | Favors JSONB | Favors Separate Table |
-|-----------|--------------|----------------------|
-| Cardinality | 1:1 or 1:few | 1:many or many:many |
-| Query pattern | Always with parent | Independent queries |
-| Filtering/JOINs | Never filtered independently | Used in WHERE/JOIN |
-| Aggregations | Never aggregated | SUM/AVG/COUNT queries |
-| Update frequency | Set once, rarely changed | Frequently updated |
-| Data size | Small, bounded | Large, unbounded |
-| Constraints | Simple validation | Complex CHECK/UNIQUE |
-
----
-
-## Reporting Performance Implications
-
-### JSONB Advantages
-- **Reduced JOINs**: Co-located data eliminates JOIN overhead for 1:1 relationships
-- **Better locality**: Related data on same page reduces I/O
-- **Flexible schema**: Easy to add optional fields without migrations
-- **GIN indexing**: Efficient containment and existence queries
-
-### JSONB Disadvantages
-- **Aggregation overhead**: `->>'field'` casting slower than typed columns
-- **Full column updates**: No partial JSONB updates (entire value replaced)
-- **Index size**: GIN indexes larger than B-tree on typed columns
-- **Query complexity**: Path expressions less readable than column names
-
-### Mitigation Strategies
-```sql
--- For frequently aggregated JSONB fields, create expression indexes:
-CREATE INDEX idx_coverage_mean ON alignment_metadata
- USING BTREE ((coverage->>'mean_depth')::double precision);
-
--- For containment queries, use jsonb_path_ops:
-CREATE INDEX idx_checksums ON sequence_file
- USING GIN (checksums jsonb_path_ops);
-```
-
----
-
-## Strong Candidates for Consolidation
-
-### Tier 1: Clear Wins (Low Risk, High Reward)
-
-#### 1. sequence_file_checksum → sequence_file.checksums
-
-| Aspect | Current | Proposed |
-|--------|---------|----------|
-| Relationship | 1:few (1-2 per file) | JSONB array |
-| Access pattern | Always with parent | Same |
-| Independent queries | None | N/A |
-| Rows eliminated | ~50-70% reduction | - |
-
-**Current Schema:**
-```sql
-CREATE TABLE sequence_file_checksum (
- id SERIAL PRIMARY KEY,
- sequence_file_id INT NOT NULL REFERENCES sequence_file(id),
- checksum VARCHAR(255) NOT NULL,
- algorithm VARCHAR(50) NOT NULL,
- verified_at TIMESTAMP NOT NULL,
- UNIQUE (sequence_file_id, algorithm)
-);
-```
-
-**Proposed JSONB:**
-```sql
-ALTER TABLE sequence_file ADD COLUMN checksums JSONB DEFAULT '[]'::jsonb;
-
--- Structure: [{"algorithm": "MD5", "checksum": "abc123...", "verified_at": "2025-01-01T00:00:00Z"}, ...]
--- Constraint moved to application layer
-```
-
-**Migration:**
-```sql
-UPDATE sequence_file sf
-SET checksums = (
- SELECT COALESCE(jsonb_agg(jsonb_build_object(
- 'algorithm', sfc.algorithm,
- 'checksum', sfc.checksum,
- 'verified_at', sfc.verified_at
- )), '[]'::jsonb)
- FROM sequence_file_checksum sfc
- WHERE sfc.sequence_file_id = sf.id
-);
-```
-
----
-
-#### 2. sequence_http_location → sequence_file.http_locations
-
-| Aspect | Current | Proposed |
-|--------|---------|----------|
-| Relationship | 1:few (1-3 per file) | JSONB array |
-| Access pattern | Always with parent | Same |
-| Independent queries | None | N/A |
-| Rows eliminated | ~80% reduction | - |
-
-**Current Schema:**
-```sql
-CREATE TABLE sequence_http_location (
- id SERIAL PRIMARY KEY,
- sequence_file_id INT NOT NULL REFERENCES sequence_file(id),
- file_url TEXT NOT NULL,
- file_index_url TEXT
-);
-```
-
-**Proposed JSONB:**
-```sql
-ALTER TABLE sequence_file ADD COLUMN http_locations JSONB DEFAULT '[]'::jsonb;
-
--- Structure: [{"file_url": "https://...", "file_index_url": "https://..."}, ...]
-```
-
----
-
-#### 3. sequence_atp_location → sequence_file.atp_location
-
-| Aspect | Current | Proposed |
-|--------|---------|----------|
-| Relationship | 1:1 (one per file max) | JSONB object |
-| Access pattern | Always with parent | Same |
-| Independent queries | None | N/A |
-| Table eliminated | Yes | - |
-
-**Current Schema:**
-```sql
-CREATE TABLE sequence_atp_location (
- id SERIAL PRIMARY KEY,
- sequence_file_id INT NOT NULL REFERENCES sequence_file(id),
- repo_did VARCHAR(255) NOT NULL,
- record_cid VARCHAR(255) NOT NULL,
- record_path TEXT NOT NULL,
- index_did VARCHAR(255),
- index_cid VARCHAR(255)
-);
-```
-
-**Proposed JSONB:**
-```sql
-ALTER TABLE sequence_file ADD COLUMN atp_location JSONB;
-
--- Structure: {"repo_did": "did:plc:...", "record_cid": "...", "record_path": "...", ...}
--- NULL when no ATP location
-```
-
----
-
-### Tier 2: Strong Candidates (Medium Effort)
-
-#### 4. alignment_coverage → alignment_metadata.coverage
-
-| Aspect | Current | Proposed |
-|--------|---------|----------|
-| Relationship | 1:1 (strict FK) | JSONB object |
-| Access pattern | Always JOINed | Same |
-| Aggregations | Yes (CoverageBenchmark) | Requires expression indexes |
-| Rows eliminated | 50% | - |
-
-**Current Schema:**
-```sql
-CREATE TABLE alignment_coverage (
- id SERIAL PRIMARY KEY,
- alignment_metadata_id INT NOT NULL UNIQUE REFERENCES alignment_metadata(id),
- mean_depth DOUBLE PRECISION,
- median_depth DOUBLE PRECISION,
- percent_coverage_at_1x DOUBLE PRECISION,
- percent_coverage_at_5x DOUBLE PRECISION,
- percent_coverage_at_10x DOUBLE PRECISION,
- percent_coverage_at_20x DOUBLE PRECISION,
- percent_coverage_at_30x DOUBLE PRECISION,
- bases_no_coverage BIGINT,
- bases_low_quality_mapping BIGINT,
- bases_callable BIGINT,
- mean_mapping_quality DOUBLE PRECISION
-);
-```
-
-**Proposed JSONB:**
-```sql
-ALTER TABLE alignment_metadata ADD COLUMN coverage JSONB;
-
--- Structure: {
--- "mean_depth": 30.5,
--- "median_depth": 29.0,
--- "percent_coverage_at_1x": 0.99,
--- ...
--- }
-```
-
-**Required Index for Aggregations:**
-```sql
--- Support CoverageBenchmark queries
-CREATE INDEX idx_am_coverage_mean_depth
- ON alignment_metadata USING BTREE ((coverage->>'mean_depth')::double precision);
-CREATE INDEX idx_am_coverage_median
- ON alignment_metadata USING BTREE ((coverage->>'median_depth')::double precision);
-```
-
-**Impact:** Requires rewriting 4 aggregation queries in `CoverageBenchmarkRepository`:
-- `getBenchmarksByLab`
-- `getBenchmarksByLabAndTestType`
-- `getBenchmarksByContig`
-- `getOverallBenchmarks`
-
----
-
-#### 5. citizen_biosample_original_haplogroup → citizen_biosample.original_haplogroups
-
-| Aspect | Current | Proposed |
-|--------|---------|----------|
-| Relationship | 1:few (per publication) | JSONB array |
-| Access pattern | With parent | Same |
-| Constraint | UNIQUE(biosample_id, publication_id) | Application-level |
-| Existing JSONB | y_haplogroup, mt_haplogroup already JSONB | Consistent pattern |
-
-**Current Schema:**
-```sql
-CREATE TABLE citizen_biosample_original_haplogroup (
- id SERIAL PRIMARY KEY,
- citizen_biosample_id INT NOT NULL REFERENCES citizen_biosample(id),
- publication_id INT NOT NULL REFERENCES publication(id),
- y_haplogroup_result JSONB,
- mt_haplogroup_result JSONB,
- notes TEXT,
- UNIQUE(citizen_biosample_id, publication_id)
-);
-```
-
-**Proposed JSONB:**
-```sql
-ALTER TABLE citizen_biosample
- ADD COLUMN original_haplogroups_by_publication JSONB DEFAULT '[]'::jsonb;
-
--- Structure: [
--- {
--- "publication_id": 123,
--- "y_haplogroup_result": {...},
--- "mt_haplogroup_result": {...},
--- "notes": "..."
--- },
--- ...
--- ]
-```
-
-**Uniqueness Enforcement:**
-```scala
-// Application-level validation
-def addOriginalHaplogroup(biosample: CitizenBiosample, pubId: Int, data: HaplogroupData): Future[CitizenBiosample] = {
- val existing = biosample.originalHaplogroupsByPublication.getOrElse(Seq.empty)
- if (existing.exists(_.publicationId == pubId)) {
- Future.failed(DuplicatePublicationHaplogroupError(pubId))
- } else {
- val updated = existing :+ OriginalHaplogroup(pubId, data.y, data.mt, data.notes)
- biosampleRepository.update(biosample.copy(originalHaplogroupsByPublication = Some(updated)))
- }
-}
-```
-
----
-
-#### 6. biosample_original_haplogroup → biosample.original_haplogroups
-
-Same pattern as #5, for publication (external) biosamples.
-
-**Current Schema:**
-```sql
-CREATE TABLE biosample_original_haplogroup (
- id SERIAL PRIMARY KEY,
- biosample_id INT NOT NULL REFERENCES biosample(id),
- publication_id INT NOT NULL REFERENCES publication(id),
- y_haplogroup_result JSONB,
- mt_haplogroup_result JSONB,
- notes TEXT,
- UNIQUE(biosample_id, publication_id)
-);
-```
-
-**Proposed:** Same JSONB array pattern on `biosample` table.
-
----
-
-### Tier 3: Conditional Candidates (Trade-offs)
-
-#### 7. Revision Metadata Tables (haplogroup_variant_metadata, relationship_revision_metadata)
-
-| Aspect | Current | Consideration |
-|--------|---------|---------------|
-| Relationship | 1:many (revision history) | JSONB array for history |
-| Access pattern | Recursive chain queries | Would need app-level recursion |
-| Use case | Audit trail | Append-only log fits JSONB |
-
-**Recommendation:** Hybrid approach - keep current table for active revisions, add JSONB column for historical snapshot:
-
-```sql
--- Add historical log to parent tables
-ALTER TABLE haplogroup_variant ADD COLUMN revision_history JSONB DEFAULT '[]'::jsonb;
-ALTER TABLE haplogroup_relationship ADD COLUMN revision_history JSONB DEFAULT '[]'::jsonb;
-
--- Structure: [
--- {"revision_id": 1, "author": "...", "timestamp": "...", "change_type": "CREATE", "comment": "..."},
--- {"revision_id": 2, "author": "...", "timestamp": "...", "change_type": "UPDATE", "comment": "..."}
--- ]
-```
-
-**Trade-off:** Faster history reads, but complex recursive queries (getVariantRevisionChain) would need application logic.
-
----
-
-## Do NOT Consolidate
-
-### Many-to-Many Junction Tables
-- `publication_biosample`
-- `publication_citizen_biosample`
-- `publication_ena_study`
-
-**Reason:** Junction tables with two FKs are the correct pattern for M:N relationships. JSONB arrays on both sides would require complex sync logic.
-
-### Core Entity Tables
-- `biosample`, `citizen_biosample`, `specimen_donor`
-- `haplogroup`, `variant`, `haplogroup_variant`
-- `publication`, `genomic_studies`
-
-**Reason:** Independently queried entities with complex filtering, constraints, and relationships.
-
-### Graph/Tree Structures
-- `pangenome_graph`, `pangenome_node`, `pangenome_edge`, `pangenome_path`
-- `haplogroup_relationship` (tree structure)
-
-**Reason:** Graph algorithms require efficient traversal; JSONB would complicate recursive queries.
-
-### High-Volume Data Tables
-- `reported_variant_pangenome`
-- `quality_metrics`
-- `ibd_discovery_index`
-
-**Reason:** High cardinality, independent queries, aggregation targets.
-
----
-
-## Implementation Plan
-
-### Phase 1: Sequence File Consolidation (Low Risk) [X] Completed
-
-**Evolution XX:**
-```sql
--- !Ups
-
--- 1. Add JSONB columns
-ALTER TABLE sequence_file ADD COLUMN checksums JSONB DEFAULT '[]'::jsonb;
-ALTER TABLE sequence_file ADD COLUMN http_locations JSONB DEFAULT '[]'::jsonb;
-ALTER TABLE sequence_file ADD COLUMN atp_location JSONB;
-
--- 2. Migrate data
-UPDATE sequence_file sf SET checksums = (
- SELECT COALESCE(jsonb_agg(jsonb_build_object(
- 'algorithm', sfc.algorithm,
- 'checksum', sfc.checksum,
- 'verified_at', sfc.verified_at
- ) ORDER BY sfc.algorithm), '[]'::jsonb)
- FROM sequence_file_checksum sfc WHERE sfc.sequence_file_id = sf.id
-);
-
-UPDATE sequence_file sf SET http_locations = (
- SELECT COALESCE(jsonb_agg(jsonb_build_object(
- 'file_url', shl.file_url,
- 'file_index_url', shl.file_index_url
- )), '[]'::jsonb)
- FROM sequence_http_location shl WHERE shl.sequence_file_id = sf.id
-);
-
-UPDATE sequence_file sf SET atp_location = (
- SELECT jsonb_build_object(
- 'repo_did', sal.repo_did,
- 'record_cid', sal.record_cid,
- 'record_path', sal.record_path,
- 'index_did', sal.index_did,
- 'index_cid', sal.index_cid
- )
- FROM sequence_atp_location sal WHERE sal.sequence_file_id = sf.id
-);
-
--- 3. Create indexes
-CREATE INDEX idx_sf_checksums ON sequence_file USING GIN (checksums jsonb_path_ops);
-
--- 4. Drop old tables
-DROP TABLE sequence_file_checksum;
-DROP TABLE sequence_http_location;
-DROP TABLE sequence_atp_location;
-
--- !Downs
--- (reverse migration SQL)
-```
-
-**Code Changes:**
-- Update `SequenceFile` domain model
-- Remove `SequenceFileChecksum`, `SequenceHttpLocation`, `SequenceAtpLocation` models
-- Update `SequenceFileRepository` to handle JSONB
-- Update `BiosampleDataService` file creation logic
-
----
-
-### Phase 2: Alignment Coverage Consolidation (Medium Risk)
-
-**Evolution XX+1:**
-```sql
--- !Ups
-
-ALTER TABLE alignment_metadata ADD COLUMN coverage JSONB;
-
-UPDATE alignment_metadata am SET coverage = (
- SELECT jsonb_build_object(
- 'mean_depth', ac.mean_depth,
- 'median_depth', ac.median_depth,
- 'percent_coverage_at_1x', ac.percent_coverage_at_1x,
- 'percent_coverage_at_5x', ac.percent_coverage_at_5x,
- 'percent_coverage_at_10x', ac.percent_coverage_at_10x,
- 'percent_coverage_at_20x', ac.percent_coverage_at_20x,
- 'percent_coverage_at_30x', ac.percent_coverage_at_30x,
- 'bases_no_coverage', ac.bases_no_coverage,
- 'bases_low_quality_mapping', ac.bases_low_quality_mapping,
- 'bases_callable', ac.bases_callable,
- 'mean_mapping_quality', ac.mean_mapping_quality
- )
- FROM alignment_coverage ac WHERE ac.alignment_metadata_id = am.id
-);
-
--- Indexes for aggregation queries
-CREATE INDEX idx_am_cov_mean_depth ON alignment_metadata
- USING BTREE (((coverage->>'mean_depth')::double precision));
-CREATE INDEX idx_am_cov_pct_30x ON alignment_metadata
- USING BTREE (((coverage->>'percent_coverage_at_30x')::double precision));
-
-DROP TABLE alignment_coverage;
-```
-
-**Code Changes:**
-- Update `AlignmentMetadata` model to include `AlignmentCoverage` as embedded case class
-- Rewrite `CoverageBenchmarkRepository` aggregation queries
-- Update `AlignmentRepository` to handle embedded coverage
-
-**Query Migration Example:**
-```sql
--- Before (with JOIN)
-SELECT sl.lab, sl.test_type, AVG(ac.mean_depth)
-FROM sequence_library sl
-JOIN sequence_file sf ON sf.library_id = sl.id
-JOIN alignment_metadata am ON am.sequence_file_id = sf.id
-JOIN alignment_coverage ac ON ac.alignment_metadata_id = am.id
-GROUP BY sl.lab, sl.test_type;
-
--- After (JSONB)
-SELECT sl.lab, sl.test_type, AVG((am.coverage->>'mean_depth')::double precision)
-FROM sequence_library sl
-JOIN sequence_file sf ON sf.library_id = sl.id
-JOIN alignment_metadata am ON am.sequence_file_id = sf.id
-WHERE am.coverage IS NOT NULL
-GROUP BY sl.lab, sl.test_type;
-```
-
----
-
-### Phase 3: Haplogroup Tracking Consolidation (Medium Risk)
-
-**Evolution XX+2:**
-```sql
--- !Ups
-
-ALTER TABLE citizen_biosample
- ADD COLUMN original_haplogroups_by_publication JSONB DEFAULT '[]'::jsonb;
-
-ALTER TABLE biosample
- ADD COLUMN original_haplogroups_by_publication JSONB DEFAULT '[]'::jsonb;
-
--- Migrate citizen_biosample data
-UPDATE citizen_biosample cb SET original_haplogroups_by_publication = (
- SELECT COALESCE(jsonb_agg(jsonb_build_object(
- 'publication_id', cboh.publication_id,
- 'y_haplogroup_result', cboh.y_haplogroup_result,
- 'mt_haplogroup_result', cboh.mt_haplogroup_result,
- 'notes', cboh.notes
- )), '[]'::jsonb)
- FROM citizen_biosample_original_haplogroup cboh
- WHERE cboh.citizen_biosample_id = cb.id
-);
-
--- Migrate biosample data
-UPDATE biosample b SET original_haplogroups_by_publication = (
- SELECT COALESCE(jsonb_agg(jsonb_build_object(
- 'publication_id', boh.publication_id,
- 'y_haplogroup_result', boh.y_haplogroup_result,
- 'mt_haplogroup_result', boh.mt_haplogroup_result,
- 'notes', boh.notes
- )), '[]'::jsonb)
- FROM biosample_original_haplogroup boh
- WHERE boh.biosample_id = b.id
-);
-
--- Index for publication lookups
-CREATE INDEX idx_cb_orig_hg_pub ON citizen_biosample
- USING GIN (original_haplogroups_by_publication jsonb_path_ops);
-CREATE INDEX idx_b_orig_hg_pub ON biosample
- USING GIN (original_haplogroups_by_publication jsonb_path_ops);
-
-DROP TABLE citizen_biosample_original_haplogroup;
-DROP TABLE biosample_original_haplogroup;
-```
-
----
-
-## Summary
-
-### Tables to Consolidate (7 total)
-
-| Current Table | Target Parent | Column Type | Priority |
-|--------------|---------------|-------------|----------|
-| sequence_file_checksum | sequence_file | JSONB array | P1 |
-| sequence_http_location | sequence_file | JSONB array | P1 |
-| sequence_atp_location | sequence_file | JSONB object | P1 |
-| alignment_coverage | alignment_metadata | JSONB object | P2 |
-| pangenome_alignment_coverage | pangenome_alignment_metadata | JSONB object | P2 |
-| citizen_biosample_original_haplogroup | citizen_biosample | JSONB array | P3 |
-| biosample_original_haplogroup | biosample | JSONB array | P3 |
-
-### Expected Outcomes
-
-- **Tables eliminated:** 7
-- **JOIN reduction:** 3-4 fewer JOINs per sequence file query
-- **Code simplification:** Fewer repository classes, simpler data access
-- **Performance:** Faster reads for 1:1/1:few relationships; requires monitoring for aggregations
-
-### Risks & Mitigations
-
-| Risk | Mitigation |
-|------|------------|
-| Aggregation slowdown | Expression indexes on frequently aggregated fields |
-| Lost constraints | Application-level validation |
-| Migration errors | Staged rollout with verification queries |
-| Reporting impact | Benchmark before/after with production-like data |
diff --git a/documents/planning/multi-test-type-roadmap.md b/documents/planning/multi-test-type-roadmap.md
index 7fc76129..f9f6b152 100644
--- a/documents/planning/multi-test-type-roadmap.md
+++ b/documents/planning/multi-test-type-roadmap.md
@@ -1,5 +1,49 @@
# Multi-Test-Type Support Roadmap
+> **⚖️ This doc conflates AppView and Navigator concerns (2026-06-12).** Most of it —
+> per-test-type taxonomy/tracking, chip parsing, marker-coverage, file formats, the
+> `genotyping_test_summary`/accuracy-tier machinery — is **Navigator's** (the Edge
+> tracks data *by test*). The **AppView only cares that the calls are reliable enough
+> to build the shared genealogy components** (tree, IBD, reports). That reliability
+> has exactly two inputs:
+> 1. **Coverage conformance** — is a run's depth in line with the norm? **DONE** (D7
+> below).
+> 2. **Cross-technology consensus** — the per-biosample call reconciled across all
+> its sequencing technologies (`fed.haplogroup_reconciliation`: consensus_haplogroup
+> + confidence + snp_concordance + run_count). **In the AppView the *consensus*
+> drives tree evolution + reporting, never the individual runs.** This is mirrored
+> but **not yet wired in** — the remaining AppView piece.
+>
+> Cross-test-type IBD (Phase 6) is the separate D1/D3 Edge-to-Edge track. The
+> haplogroup-marker-coverage / accuracy-tier machinery (Phases 4–5) is Navigator's.
+>
+> **⚖️ D7 coverage QA DONE (2026-06-12).** Built:
+> `genomics.test_type_coverage_norm` (mig 0030) — the **empirically-derived** cohort
+> norm per test type (median/quartile depth, pct tiers, typical Y/mt marker counts),
+> recomputed from `fed.coverage_summary ⋈ fed.sequencerun` + `fed.genotype` by
+> `du_db::coverage::recompute_norms` (`du-jobs run-once coverage-norms` + hourly);
+> per-sample **conformance** on the report (actual depth vs cohort norm + advertised
+> spec → BELOW/AT/ABOVE), baselined on the empirical norm because an advertised "30×
+> WGS" is a ~90 Gb raw-yield spec and D2C labs don't target 30× aligned; vendor
+> conformance on `coverage::benchmarks` (`meets_spec`/`depth_delta`); read API
+> `GET /api/v1/test-types[/:code]`. Deferred: age-contribution wiring (typical SNP
+> counts captured), raw-yield (Gbases) norm. Memory `test-type-coverage-norms`.
+> The Phase-1 schema notes below remain accurate for `test_type_definition` /
+> `coverage_expectation_profile`.
+>
+> **⚖️ Rust status (2026-06-07).** **Phase-1 schema is built**, leaner than below:
+> `genomics.test_type_definition` (omits `expected_target_depth`,
+> `expected_marker_count`, `version`, `release_date`, `deprecated_at`,
+> `successor_test_type_id`, `documentation_url`), coverage thresholds in a separate
+> `genomics.coverage_expectation_profile`, and a **native `sequence_library.test_type_id`
+> FK** (no string column to migrate); `du-domain` `DataGenerationMethod` /
+> `TargetType` enums exist. **Seed data is not yet loaded.** Phases 2–6 (target
+> regions, `genotyping_test_summary`, marker-coverage reference, test-type-aware
+> confidence, cross-test-type IBD) are **forward work = `design-roadmap-rust-rewrite.md`
+> D7**. Read the Scala/Slick/Tapir/Pekko + removed `/api/private` specifics as
+> illustrative — restate in Rust (axum/utoipa; the Jetstream mirror) when built.
+> Triage: `design-doc-triage-report.md` §5.
+
## Executive Summary
This document outlines the roadmap for extending DecodingUs beyond Whole Genome Sequencing (WGS) to support:
@@ -18,8 +62,8 @@ This roadmap integrates with other planning documents:
| Document | Relationship |
|----------|-------------|
| `haplogroup-discovery-system.md` | **Primary integration point.** Y/mtDNA variants from all test types feed into the discovery system for tree building. This roadmap's chip and targeted sequencing services delegate to the discovery system's `PrivateVariantExtractionService`. |
-| `ibd-matching-system.md` | IBD comparisons happen Edge-to-Edge using autosomal data. This roadmap's test type metadata helps determine comparison compatibility. |
-| `jsonb-consolidation-analysis.md` | Some tables in this roadmap may be candidates for JSONB consolidation. |
+| `d3-ibd-matching-impl.md` (on `d1-encrypted-edge-exchange.md`) | IBD comparisons happen Edge-to-Edge using autosomal data. This roadmap's test type metadata helps determine comparison compatibility. |
+| JSONB consolidation (realized, mig 0002/0004) | The 1:1/1:few tables here were folded into JSONB on their parents in the redesign. |
**Schema Note:** All haplogroup-related tables reside in the `tree` schema as defined in `haplogroup-discovery-system.md`. This includes:
- `tree.haplogroup`, `tree.haplogroup_variant`, `tree.haplogroup_relationship`
@@ -1261,7 +1305,7 @@ class ParserFtdna extends ChipDataParser { ... }
1. **Reference Data Download**: Edge App fetches marker coverage reference from DecodingUs to know which Y/mtDNA SNPs to extract
2. **Metadata Registration**: Edge App submits `GenotypingTestSummary` after local processing
3. **Haplogroup Variant Submission**: Edge App submits Y/mtDNA variants for tree building
-4. **IBD Coordination**: Autosomal comparisons happen Edge-to-Edge per `ibd-matching-system.md`
+4. **IBD Coordination**: Autosomal comparisons happen Edge-to-Edge per `d3-ibd-matching-impl.md`
---
diff --git a/documents/planning/open-code-notes.md b/documents/planning/open-code-notes.md
new file mode 100644
index 00000000..f05443d0
--- /dev/null
+++ b/documents/planning/open-code-notes.md
@@ -0,0 +1,30 @@
+# Tracked: open in-code notes (TODO / transitional)
+
+Created 2026-06-10. A small backlog of the deliberate forward-looking notes left in
+the Rust source, surfaced by a TODO/hack sweep. Neither is a bug; both are
+intentional and scoped. Tracked here so they don't get lost in code comments.
+
+## 1. Jobs: variant-export to a file artifact
+
+- **Where:** `rust/crates/du-jobs/src/main.rs` (`TODO(jobs)`)
+- **What:** add a scheduled job that exports the variant catalog to a file artifact.
+- **Context:** the live path already exists — `GET /api/v1/variants/export` streams CSV
+ on demand. This would be the batch/artifact equivalent (e.g. a periodic dump for
+ downstream consumers). Match-discovery is explicitly **out of scope** (IBD is not in
+ production — see the AppView-coordinator track in `collab-platform-d1-d5`).
+- **Priority:** low. No consumer is blocked; the live endpoint covers current needs.
+
+## 2. Curation intake auth: X-API-Key → OAuth bearer
+
+- **Where:** `rust/crates/du-web/src/routes/curation.rs` (module doc)
+- **What:** the Navigator → curation intake endpoint authenticates with a static
+ `X-API-Key` today; it should become the OAuth bearer once the Edge handshake is live.
+- **Context:** gated on the encrypted Edge-exchange substrate — see
+ `documents/planning/d1-encrypted-edge-exchange.md`. Until that handshake exists, the
+ API key is the machine-auth stopgap.
+- **Priority:** sequenced after D1. Functional and acceptable in the interim.
+
+---
+
+*If/when `gh` is authenticated, these can be promoted to GitHub issues; for now they
+follow the repo's `documents/planning/` issue convention.*
diff --git a/documents/planning/openalex-publication-discovery.md b/documents/planning/openalex-publication-discovery.md
index 67bcb8a9..14a1ad87 100644
--- a/documents/planning/openalex-publication-discovery.md
+++ b/documents/planning/openalex-publication-discovery.md
@@ -1,5 +1,17 @@
# OpenAlex Publication Auto-Discovery System
+> **📝 Rust status (2026-06-07).** Substantially built: scheduled
+> `publication-discovery` + `publication-update` jobs (tokio, **daily** — not a
+> weekly cron), candidates in `pubs.publication_candidate` (dedupe by `openalex_id`),
+> and the curator queue `/curator/publications` (accept→promote / reject / defer) —
+> so Phase-1's "simple curator review UI" (shown `[ ]` below) is **done**. Added
+> beyond the doc: the public on-ramp `/references/submit` (DOI → candidate).
+> **Forward:** relevance scoring (`relevance_score` exists but isn't computed), smart
+> discovery, biosample-extraction hints, and the `publication_search_run` table.
+> Swap the Scala specifics (`OpenAlexService`/Pekko/`/api/private/*`/`SERIAL`/plural
+> tables) for `du_external::openalex` + `du_db::publication` +
+> `pubs.publication_candidate`. Triage: `design-doc-triage-report.md` §3.
+
## Overview
Automatically discover and surface relevant genomic publications using OpenAlex, reducing curator burden and ensuring the platform stays current with academic research.
diff --git a/documents/planning/post-mvp-roadmap.md b/documents/planning/post-mvp-roadmap.md
index c3918148..d21e6367 100644
--- a/documents/planning/post-mvp-roadmap.md
+++ b/documents/planning/post-mvp-roadmap.md
@@ -1,5 +1,16 @@
# Post-MVP Feature Roadmap
+> **📝 Superseded for sequencing (2026-06-07).** The authoritative roadmap is now
+> [`design-roadmap-rust-rewrite.md`](./design-roadmap-rust-rewrite.md) — the gap
+> catalog, the two-track **D1–D8** plan, and the no-PII reconciliation. This doc
+> **predates and omits the entire collaboration/IBD-via-D1 platform (D1–D5)** and
+> still lists IBD as a standalone system (now superseded by D1 + D3). Status
+> drift below: tree versioning, the curator proposal/review half, and the multi-test
+> + sequencer-lab **schema** are built beyond what's checked; OpenAlex candidate
+> queue and the JSONB consolidation are done. Use the design-roadmap for current
+> sequencing; keep this for the per-phase detail + success metrics. Triage:
+> `design-doc-triage-report.md` §11.
+
## Overview
This document serves as the central planning reference for features targeted after MVP completion. Each feature has a detailed planning document; this roadmap provides the high-level view, dependencies, and sequencing.
@@ -11,9 +22,10 @@ This document serves as the central planning reference for features targeted aft
| Haplogroup Discovery System | [`haplogroup-discovery-system.md`](./haplogroup-discovery-system.md) | Planned |
| Sequencer Lab Inference | [`sequencer-lab-inference-system.md`](./sequencer-lab-inference-system.md) | Planned |
| Multi-Test-Type Support | [`multi-test-type-roadmap.md`](./multi-test-type-roadmap.md) | Planned |
-| IBD Matching System | [`ibd-matching-system.md`](./ibd-matching-system.md) | Planned |
+| IBD Matching System | superseded → [`d1-encrypted-edge-exchange.md`](./d1-encrypted-edge-exchange.md) + [`d3-ibd-matching-impl.md`](./d3-ibd-matching-impl.md) | Planned |
| OpenAlex Publication Discovery | [`openalex-publication-discovery.md`](./openalex-publication-discovery.md) | In Progress |
-| JSONB Consolidation | [`jsonb-consolidation-analysis.md`](./jsonb-consolidation-analysis.md) | Technical Debt |
+| Tree-endpoint cache revalidation (ETag/version) | [`y-tree-cache-revalidation.md`](./y-tree-cache-revalidation.md) | Backlog |
+| JSONB Consolidation | realized in the Rust redesign (mig 0002/0004) | Done |
### Non-Technical Summary
@@ -351,7 +363,7 @@ The JSONB consolidation work is distributed across feature phases to minimize di
- [Haplogroup Discovery Overview](./haplogroup-discovery-system-overview.md) - Non-technical summary
- [Sequencer Lab Inference](./sequencer-lab-inference-system.md) - Full technical design
- [Multi-Test-Type Roadmap](./multi-test-type-roadmap.md) - Full technical design
-- [IBD Matching System](./ibd-matching-system.md) - Full technical design
+- IBD Matching System — superseded; see [`d1-encrypted-edge-exchange.md`](./d1-encrypted-edge-exchange.md) + [`d3-ibd-matching-impl.md`](./d3-ibd-matching-impl.md)
- [OpenAlex Publication Discovery](./openalex-publication-discovery.md) - Automated literature discovery
-- [JSONB Consolidation Analysis](./jsonb-consolidation-analysis.md) - Technical debt analysis
+- JSONB Consolidation — realized in the Rust redesign (migrations 0002/0004)
- [Atmosphere Lexicon](../Atmosphere_Lexicon.md) - AT Protocol record definitions
\ No newline at end of file
diff --git a/documents/planning/sequencer-lab-inference-system.md b/documents/planning/sequencer-lab-inference-system.md
index 2c188d6c..7e0d33db 100644
--- a/documents/planning/sequencer-lab-inference-system.md
+++ b/documents/planning/sequencer-lab-inference-system.md
@@ -1,5 +1,41 @@
# Sequencer Lab Inference System
+> **⚖️ Rust status (2026-06-07).** The **full schema is built — including the
+> consensus tables this doc marks NEW**: `genomics.sequencing_lab`,
+> `genomics.sequencer_instrument`, `genomics.instrument_observation`, and
+> `genomics.instrument_association_proposal` (mig 0004). Deltas: `genomics` schema +
+> `BIGINT IDENTITY`; `sequencer_instrument` has **no `lab_id` FK** (and adds
+> `model_name`/`manufacturer`/`year_introduced`/`estimated_max_throughput`) —
+> instrument↔lab resolves via observation→proposal→accept, not a static FK; the
+> proposed instrument confidence columns live in the proposal table instead.
+> **DONE (2026-06-12): lookup API, consensus engine (production-hardened), curator
+> HTMX review UI, AND the `instrumentObservation` lexicon end-to-end** — citizens
+> publish `com.decodingus.atmosphere.instrumentObservation` records (real confidence
+> level KNOWN/INFERRED/GUESSED + `observedAt`); the Jetstream consumer mirrors them
+> into `fed.instrument_observation` (mig 0027), and `recompute_consensus` folds them
+> in alongside the implicit `centerName` claims with **real confidence-level +
+> recency scoring** (the score's `recency`/`level` terms are no longer constants).
+> Nothing material remains; future items are the "Future Considerations" list below.
+> The consensus engine
+> (`du_db::sequencer::recompute_consensus`) derives observations from
+> `fed.sequencerun ⋈ fed.biosample.center_name` into `genomics.instrument_observation`,
+> aggregates per instrument into `instrument_association_proposal` (dominant lab,
+> distinct-citizen counts, confidence, threshold status, conflict held at PENDING),
+> and a curator **accept** (`/manage/instrument-proposals/:id/accept`) sets
+> `sequencer_instrument.lab_id` — the same column the lookup resolves. Run by
+> `du-jobs run-once sequencer-consensus` (+ hourly). Accept/reject are audited to
+> `ident.audit_log`. Auto-accept is opt-in (off by default — curator-gated). The
+> lookup serves the **preseeded direct** instrument→lab tie:
+> mig 0025 re-adds a nullable `genomics.sequencer_instrument.lab_id`, the ETL
+> backfills it from the legacy `lab_id`, and `du_db::sequencer::{lookup_lab,
+> lab_instruments}` resolves through it. Endpoints: `GET /api/v1/sequencer/lab?
+> instrument_id=…` (→ `SequencerLabDto`, 404 if unknown) and `GET /api/v1/sequencer/
+> lab-instruments` (bulk cache seed). The proposal tables stay dormant; when
+> consensus goes live, accepting a proposal will set `lab_id`. The consensus source
+> will be `fed.sequencerun.instrument_id`; the `instrumentObservation` lexicon + its
+> `fed.*` mirror are still undefined. The "Current State / existing API endpoints /
+> domain models" below are **Scala-era**. Triage: `design-doc-triage-report.md` §6.
+
## Executive Summary
This document outlines enhancements to the existing sequencer lab lookup system to support:
diff --git a/documents/planning/tree-versioning-system.md b/documents/planning/tree-versioning-system.md
index cd1efc02..34aceae7 100644
--- a/documents/planning/tree-versioning-system.md
+++ b/documents/planning/tree-versioning-system.md
@@ -1,5 +1,25 @@
# Tree Versioning System: Production and WIP Trees
+> **📝 Rust status (2026-06-07).** Built — the doc's recommended **Option B (overlay
+> change-sets)** is what shipped: `tree.change_set` (native enum
+> `tree.change_set_status`) + `tree.tree_change`; there is **no `tree.tree_version`
+> table** (Option A was not taken); audit in `tree.curator_action`. **Stale below:**
+> the Scala/Slick/`*.scala.html` code, the `SERIAL`/`VARCHAR CHECK`/`TIMESTAMP`
+> schema (reality uses `BIGINT IDENTITY` + native enums + the existing temporal
+> `valid_from`/`valid_until` model, no version-id columns), the public
+> `/api/v1/tree/change-sets` + `/api/v1/curator/changes/*` endpoints (reality:
+> `/curator/change-sets/*` + `/curator/reviews/*` UI and `/manage/change-sets/*`
+> machine), and the granular `tree.version.*` permissions (reality: the single
+> **`Curator`** role).
+>
+> **Substantive evolution:** ambiguity handling moved from a file-based
+> `ambiguity_report_path` to the **`tree.wip_*` staging tables + the `/curator/reviews`
+> resolution flow** (REPARENT / MERGE_EXISTING / DEFER), enacted by the change-set
+> apply engine — see the user guide
+> [`../curator-guide-tree-versioning.md`](../curator-guide-tree-versioning.md). Treat
+> the API/permissions/`.scala` sections below as historical. Triage:
+> `design-doc-triage-report.md` §2.
+
## Executive Summary
This document outlines a system for managing multiple versions of the haplogroup tree: a **Production** (canonical, public-facing) version and a **WIP** (Work-In-Progress, staging) version. This enables large-scale tree merges (ISOGG, ytree.net, academic sources) to be ingested, reviewed, and validated before affecting production reporting.
diff --git a/documents/planning/variant-naming-authority.md b/documents/planning/variant-naming-authority.md
index 93a22c4e..78247c08 100644
--- a/documents/planning/variant-naming-authority.md
+++ b/documents/planning/variant-naming-authority.md
@@ -1,5 +1,16 @@
# DecodingUs Variant Naming Authority
+> **Implementation status (2026-06, Rust):** Core BUILT. Migration 0016 makes
+> `core.variant.canonical_name` nullable (NULL = unnamed, by coordinates) with a
+> partial unique index, and adds `core.du_variant_name_seq` + `core.next_du_name()`.
+> `du_db::naming` provides the naming queue, DU minting (old name → alias), the
+> lifecycle (`UNNAMED`→`PENDING_REVIEW`→`NAMED`), and a local same-coordinate
+> dedup check. Curator UI at `/curator/naming`; propagation feed at
+> `GET /api/v1/variants/export.gff` (GFF3). **Not yet:** a live external-source
+> (YBrowse/ISOGG/YFull) dedup lookup, and surfacing unnamed variants in the public
+> API/domain (the shared `du-domain::Variant.canonical_name` is still `String` —
+> a cross-repo change since Navigator shares it).
+
**Objective:** Establish DecodingUs as a recognized naming authority for Y-DNA variants, using the `DU` prefix.
## Naming Strategy
diff --git a/documents/planning/y-preprint-hallast-2026-incorporation.md b/documents/planning/y-preprint-hallast-2026-incorporation.md
new file mode 100644
index 00000000..1561c44e
--- /dev/null
+++ b/documents/planning/y-preprint-hallast-2026-incorporation.md
@@ -0,0 +1,123 @@
+# TODO: Incorporate Hallast et al. 2026 (population-scale Y assemblies)
+
+Created 2026-06-11. Repo: decodingus (AppView), branch `rust-rewrite-foundation`.
+
+**Source:** Hallast, Rhie, Loftus, et al. *"Population-scale Y chromosome assemblies
+reveal recurrent remodeling within constrained architectures."* bioRxiv
+2026.06.03.729890v1, posted 2026-06-06. CC-BY-NC-ND 4.0.
+DOI: https://doi.org/10.64898/2026.06.03.729890
+Local PDF: `~/Downloads/2026.06.03.729890v1.full.pdf`
+
+142 near-T2T de novo Y assemblies, 17 major haplogroups, dated phylogeny, full
+T2T-CHM13v2Y annotation set, and a three-way callable-mask comparison. It's a
+**resource paper**, and the resource lines up with three threads we already have
+open. This note scopes the two actionable threads + records the out-of-scope bulk.
+
+Related docs:
+- `documents/proposals/branch-age-estimation.md` (age framework — thread 2 lands here)
+- `documents/planning/y-tree-hs1-coordinate-enrichment.md` (the hs1/CHM13 native-coord issue)
+- memory: `yregions-ingest`, `y-tree-coords-recurrence`, `etl-cutover-verified`
+
+---
+
+## Thread 1 — Region-flag + callable-mask refinement (PRIORITY: high)
+
+Lands on the in-flight `yregions` ingest. Our ingest already pulls from the **exact
+bucket this paper is built on** (`human-pangenomics/T2T/CHM13/assemblies/annotation`,
+the `chm13v2.0Y_*_v1.bed` files). The paper's Methods reference **v2** versions of two
+of them and add features we don't yet load.
+
+- **Where:** `rust/crates/du-jobs/src/yregions.rs` (`SOURCES`, `classify_*`),
+ `rust/crates/du-db/src/genome_region.rs`, `du_db::variant::refresh_region_overlaps`.
+
+### Tasks
+- [x] **v2 BEDs wired** (commit `d39b314`). Both v2 files exist; bumped the two
+ `SOURCES` entries. v2 changes: inverted-repeats adds `IR2` (10→12 inverted_repeat
+ rows); amplicon coords refined. The `(region_type, name)` orphan risk is handled —
+ `run` now has full-snapshot sync (fetch-all-first → upsert → `prune_source_orphans`).
+ Live v2 reload pruned 9 orphaned v1 rows, 0 leftovers. P9/Rep1 NOT literally in v2
+ (still a separate hunt, below).
+- [x] **AZFc color-blocks — already loaded** (no action needed). The v2 amplicons BED
+ (`chm13v2.0Y_amplicons_v2.bed`, wired in d39b314) already carries the full Teitz
+ colorblock set: blue1-4, gray1-2, green1-3, red1-4, teal1-2, yellow1-2, plus
+ P1/P3/P5-AZFb/c blocks — all classified `ampliconic` (a flag type), so AZFc variants
+ are already low-confidence-for-placement. Paper Fig 2a confirms these ARE the AZFc
+ amplicon repeat blocks (b/g/r/t/y, Teitz ref 6).
+- [ ] **Add palindrome P9 — BLOCKED on coords.** Confirmed: v2 inverted-repeats has P1-P8
+ only, not P9. Paper main text (lines 519-523) gives only median length 15.8 kb + the
+ hg38 "Rep1" (12 kb) lineage; **exact CHM13v2.0 arm coordinates are in Suppl. Tables
+ 28-29** (not in the main PDF) — harvest from there or the T2T-chrY repo, then add as a
+ `palindromic` source (or wait for a v3 inverted-repeats BED that includes it). Low
+ urgency: one ~15.8 kb region.
+- [x] **Callable-mask justification — recorded** in `branch-age-estimation.md` (SNP rate
+ section) and `yregions.rs` module doc. Fig 5h-i numbers: phylogeny mask ~10.4 Mb =
+ XDR+AMPL+OTHER (excl. XTR/SAT/HET/DYZ19/CEN); XDR retained 8.111/8.341/7.437 Mb @ QV
+ 50.2/55.2/60.9; AMPL kept but QV ~46; SAT/HET/DYZ19 QV 35-44 or uncallable; no mask
+ calls centromere. de novo: 49/53 DNMs in Yq12, ~1 in euchromatin, 6/40 Yq12 SNVs are
+ gene conversion. → empirically validates the X-DEG denominator + HET_MASK.
+- [ ] After P9 (if added), re-run `decodingus-jobs run-once yregions` and confirm
+ `refresh_region_overlaps` re-flags cleanly (idempotent; full-snapshot sync since d39b314).
+
+### Validation note
+The paper empirically confirms variants in AMPL/SAT/CEN/DYZ17/DYZ19/HET are unreliable
+and recurrent — exactly the classes `classify_sequence_class` already folds to flagged
+types. This *validates* the existing `region_overlaps` design; cite it rather than
+re-architecting.
+
+---
+
+## Thread 2 — Branch-age calibration (PRIORITY: medium; follows thread 1)
+
+Lands in the age framework (`du_db::age`, `documents/proposals/branch-age-estimation.md`).
+Our model uses µ = 8.33×10⁻¹⁰ (Helgason). The paper provides an independent recent
+calibration + ready-made anchor nodes.
+
+### Tasks
+- [x] **Record the paper's clock rate as an alternative/cross-check** (commit `6544c6a`).
+ BEAST v1.10.4 strict molecular clock, **0.76×10⁻⁹ sub/site/yr (95% CI 0.67–0.86×10⁻⁹)**
+ — ~9% slower than our 0.833×10⁻⁹. Added as `du_db::age::HALLAST_RATE{,_LO,_HI}` next
+ to `SNP_RATE`; `recompute_combined_ages` keeps the Helgason default (no silent swap).
+ Both surfaced with provenance in `branch-age-estimation.md` (cross-check-clock table).
+- [x] **Seed `tree.genealogical_anchor` from dated nodes** (commit `6544c6a`, partial).
+ `scripts/seed-hallast-anchors.sql` — name-keyed, idempotent, `anchor_type='MODEL_DATED'`
+ with BEAST clock + HPD provenance in `details` (round-tripped via `date_ce` + the
+ consumer's `uncertainty_years`). Seeded + verified live (dev du-pg): **D1** TMRCA
+ 19,450 ybp (HPD 16,360–22,880) → GENEALOGICAL=COMBINED=19,450, `tmrca_ybp` gap-filled.
+ PENDING nodes (no clean clade-name map yet, recorded in the script): HG00512⋂HG02056
+ ~10,300 ybp (HPD 8,400–12,300, Suppl. Fig 61); HG00609-ref node 10,350 ybp (HPD
+ 8,540–12,330). Per-node TMRCAs aren't in extractable supplement text (Fig 1b / Suppl.
+ Fig 1 are figures) — harvest the rest from the Suppl. Tables workbook.
+ ⚠ Circularity caveat documented: these calibrate our SNP clock against another SNP
+ clock (intended, but flagged via `MODEL_DATED` so the term can be filtered).
+- [x] **Note the de novo per-generation rate** (commit `6544c6a`). Recorded in
+ `branch-age-estimation.md` (cross-check-clock subsection): CEPH-pedigree DNMs (R1b,
+ Porubsky et al. 2025) are the matching per-generation empirical anchor for the clock.
+
+---
+
+## Out of scope (record, don't build)
+
+The bulk of the paper is deep sequence biology a haplogroup/genealogy platform won't
+model: DAZ/RBMY/TSPY multicopy copy-number evolution, 5mC methylation profiles,
+centromere DYZ3 α-satellite HOR / CDR, Yq12 (DYZ1/DYZ2) structural genomics, gene
+conversion / G4 motifs, AZFc structural-haplotype cataloguing. The recurrence *principle*
+("repeat-mediated variants arise independently → distrust for placement") is already
+captured by our `defining_haplogroup_id` recurrence model + `region_overlaps` flag; the
+paper is supporting evidence, not new schema.
+
+---
+
+## Data sources (all public)
+
+- HPRC Data Release 2: https://humanpangenome.org/hprc-data-release-2/
+- T2T-chrY analysis repo (annotation/scaffolding code): https://github.com/arangrhie/T2T-chrY
+- Annotation BEDs (what `yregions` already reads):
+ `s3://human-pangenomics/T2T/CHM13/assemblies/annotation/`
+- GQC suspect-region BEDs: `s3://human-pangenomics/T2T/scratch/chrY/GQC/`
+- Samples: 1kGP Diversity Panel cell lines (132/144) + GIAB + CEPH1463 — public, except
+ two CEPH samples (NA12883/NA12884) which are dbGaP-restricted.
+
+## Suggested first step
+Verify the two v2 BED URLs resolve, diff v2-vs-v1 parsed output, then wire the v2
+sources + AZFc color-blocks + P9 into `SOURCES`. That's the smallest contained change
+and it's on code already uncommitted on this branch.
diff --git a/documents/planning/y-tree-cache-revalidation.md b/documents/planning/y-tree-cache-revalidation.md
new file mode 100644
index 00000000..05f809ec
--- /dev/null
+++ b/documents/planning/y-tree-cache-revalidation.md
@@ -0,0 +1,77 @@
+# Work Item: Tree-endpoint cache revalidation (ETag / version)
+
+**Status:** DONE (2026-06-12) — see "Implementation" below
+**Surface:** `du-web` — `GET /api/v1/y-tree/full` (and `mt-tree/full`)
+**Filed:** 2026-06-11 · **Origin:** Navigator (Edge) tree-cache staleness incident
+
+## Problem
+
+The Navigator caches the full tree JSON on disk and (historically) served it
+**cache-first forever**, with no way to know the AppView had a newer tree. This bit
+a real placement: a Navigator cached the `/y-tree/full` payload on 2026-06-10 when
+only **28.9%** of variants carried `hs1` (CHM13) coordinates; the AppView was later
+enriched to **91.7%**, but the Edge kept using the stale copy and **under-placed a
+low-coverage HiFi sample** (R-FGC29071 → K2b) because the deep R1b tips had no
+`hs1` coordinate to genotype. Only a manual refetch fixed it.
+
+The Edge has since added a **7-day TTL** (refetch weekly; fall back to the stale
+copy if the AppView is unreachable). That bounds staleness but is coarse: it
+re-downloads the full payload (~28 MB) on every expiry even when nothing changed,
+and a curated tree update inside the 7-day window isn't seen until the window rolls.
+
+## Ask
+
+Let the Edge **revalidate cheaply** instead of blindly re-downloading on a timer:
+
+1. Emit a stable **`ETag`** (and/or `Last-Modified`) on `GET /api/v1/y-tree/full`
+ and `GET /api/v1/mt-tree/full`, derived from the tree's content/version — e.g.
+ the active tree revision id + a hash of the serialized payload. The same input
+ that changes the payload must change the ETag.
+2. Honor **conditional GET**: `If-None-Match` (and `If-Modified-Since`) → return
+ **`304 Not Modified`** with no body when unchanged.
+3. (Optional, nice-to-have) expose the tree **version/revision** as a small JSON
+ field (e.g. `GET /api/v1/y-tree/version` → `{ "revision": …, "etag": … }`) so
+ clients can check version without fetching the tree at all.
+
+Since the tree is temporal (bitemporal `tree.haplogroup_relationship`, per-revision
+metadata), the ETag should key on the **current published revision** + the build's
+coordinate-enrichment state, so a `hs1`-coordinate backfill (the exact thing that
+caused the incident) bumps the ETag even if the topology is unchanged.
+
+## Acceptance criteria
+
+- `GET /api/v1/y-tree/full` returns an `ETag`; a subsequent request with a matching
+ `If-None-Match` returns `304` with an empty body.
+- Any change that alters the served payload (topology, variant set, **coordinate
+ enrichment**, naming) changes the `ETag`.
+- mt-tree parity.
+
+## Implementation (2026-06-12)
+
+A **persisted revision marker** (`tree.tree_revision`, migration 0024 — one global
+row) is the ETag source, **not** a request-time payload hash. It is bumped (+1)
+once by each tree-mutating operation: `change_set::apply` (in-txn), the coordinate
+/alias bulk enrichers (`variant::set_coordinates_bulk` / `set_aliases_bulk` — the
+hs1 backfill that caused the incident), `ybrowse::reconcile`, and the `tree-init`
+build. (`du_db::tree_revision::{current,bump}`.)
+
+The tree handlers now do a **cheap conditional GET**: read the revision marker,
+build the ETag, and short-circuit to **304** on a matching `If-None-Match`
+*before* the ~28 MB query/serialization. ETag = `"---r"`
+(strong), so full-vs-plain, Y-vs-mt, and per-root payloads get distinct tokens; a
+global bump revalidates both trees (safe over-invalidation, never a false 304).
+Responses carry `ETag` + `Last-Modified` + `Cache-Control: no-cache`. Added
+`GET /api/v1/y-tree/version` + `/mt-tree/version` → `{revision, etag, updated_at}`.
+
+Verified over HTTP against the dev tree: full payload 28.67 MB → a matching
+`If-None-Match` returns **304 / 0 bytes**; a revision bump flips the old validator
+back to **200**. Tests: `du_db::tree_revision` integration, ETag-helper unit tests,
+and a `du-web` oneshot 200→304→bump→200 + version cycle.
+
+## Edge-side counterpart (already done, for reference)
+
+Navigator now: 7-day cache TTL (`NAVIGATOR_TREE_TTL_DAYS`), graceful fallback to the
+stale copy on fetch failure, and a **scoring cache keyed on the tree's content hash**
+(re-scores only when the tree content actually changes). With an ETag, the Edge would
+switch from "re-download weekly" to "revalidate weekly via `If-None-Match`, download
+only on `200`" — cutting the bandwidth and letting the TTL drop without cost.
diff --git a/documents/planning/y-tree-hs1-coordinate-enrichment.md b/documents/planning/y-tree-hs1-coordinate-enrichment.md
new file mode 100644
index 00000000..93b00ffb
--- /dev/null
+++ b/documents/planning/y-tree-hs1-coordinate-enrichment.md
@@ -0,0 +1,139 @@
+# Issue: Y-tree needs complete `hs1` (CHM13) coordinates for native CHM13 placement
+
+Created 2026-06-10. Repo: decodingus (AppView), branch `rust-rewrite-foundation`. Companion to
+the Navigator (DUNavigator) DecodingUs Y-tree provider — see that repo's
+`documents/design/DecodingUsTreeProvider.md` + `memory/decodingus-tree-provider.md`.
+
+## RESOLUTION (2026-06-10)
+
+Done in-place (no tree rebuild — the dev DB was already ISOGG + decoding-us, no FTDNA):
+
+1. **FTDNA descoped from code** — removed the Y-graft + mt-foundation paths from
+ `tree_init.rs` and the `reattach` path from `du_db::snp_graft` (FTDNA-only).
+2. **Root cause of the coordinate gap found + fixed:** the YBrowse mirror had been
+ ingested **without chain files**, so GRCh37/hs1 were empty on all 3.1M rows.
+ Re-ran `decodingus-jobs run-once ybrowse` with `YBROWSE_CHAIN_GRCH37`
+ (`hg38ToHg19`) + `YBROWSE_CHAIN_HS1` (`hg38ToHs1`), chains at
+ `~/Development/decodingus-data/chains/`. Reconcile enriched 2.98M variants.
+ **Live `/api/v1/y-tree/full` hs1 coverage: 29% → 88%** (GRCh37 28% → 88%).
+3. **ISOGG name-only resolution** — the residual coordless tips were ISOGG
+ name-*decoration* mismatches, not missing SNPs (ybrowse has the base SNP):
+ `.1`/`.2` = recurrence (same site, different branch), `^^` = stability marker.
+ Modeled per the universal-variant design via a new
+ `core.variant.defining_haplogroup_id` (migration 0023) — each recurrence is a
+ sibling row sharing name+coordinate, scoped to its branch. Ran
+ `tree-init --resolve-recurrence --apply` → 2,590 recurrence-ized + 2 folded;
+ empty-coordless tree variants 6,256 → 3,665 (residue: 3,122 reconcile-flags for
+ the curator, ~580 no-base/compound, ~118 genuinely not in ybrowse).
+4. **decoding-us multi-branch / back-mutation (forward/reverse) labeling** — 715
+ variants link the same SNP to >1 branch. `tree-init --label-recurrence --apply`
+ classifies each link by topological parsimony (Dollo: even defining-ancestors =
+ forward, odd = reverse/back-mutation) and writes the migration-0021 per-link
+ `ancestral_allele`/`derived_allele`. 630 labeled (381 homoplasy, 251
+ back-mutations). `scrub_recurrent_links` now **skips ASR-labeled variants**, so
+ genuine recurrence survives (scrub examined 715→85 after labeling). NOTE: purely
+ topological — no tip genotypes/character-state data exist for true ASR.
+
+Remaining: the 18 multi-link ISOGG-decorated variants (need one recurrence row per
+branch), the reconcile-flag curator queue, and genotype-based ASR if/when sample
+call data lands.
+
+## (original issue follows)
+
+## TL;DR
+
+The Navigator desktop app now places Y haplogroups against **our** DecodingUs tree (served by
+`GET /api/v1/y-tree/full`, added in commit cd97864) instead of FTDNA. It uses each variant's
+**native build coordinate** — for a CHM13 alignment, the `hs1` coordinate — so placement needs
+**no liftover**. That's the intended architecture: the AppView owns multi-build coordinates;
+Navigator stays liftover-free.
+
+**The gap:** `hs1` coordinates today cover only the **decoding-us backbone**, not the
+FTDNA-grafted tips. So a CHM13 sample places correctly down the backbone but **stops at K2b
+instead of reaching its terminal (R-FGC29071)**. The AppView needs to provide `hs1` coordinates
+for **every** tree variant.
+
+## Evidence (live, GFX0457637 CHM13 HiFi BAM)
+
+Validated against a locally-running AppView (`/api/v1/y-tree/full`), Navigator test
+`validate_gfx_decodingus_y`:
+
+| Path | Coords used | SNPs matched | Placement |
+|------|-------------|-------------:|-----------|
+| DecodingUs **native hs1** (no liftover) | `hs1` | 101 / 119 | **K2b** (backbone only) |
+| FTDNA GRCh38 + liftover (reference) | GRCh38 | 1592 / 1919 | **R-FGC29071** (correct terminal) |
+
+Coordinate coverage across the **79,602** tree variant-links in `/api/v1/y-tree/full`:
+
+| Build | variant-links with this coordinate | % |
+|-------|-----------------------------------:|---|
+| GRCh38 | 70,294 | 88% |
+| GRCh37 | 22,300 | 28% |
+| **hs1 (CHM13)** | **22,988** | **29%** |
+
+So ~47k tree variants have a GRCh38 coordinate but **no hs1** — and those are the deeper
+(FTDNA-grafted) tips. M207 (R root) *does* have hs1, but the R-subclade tips below K2b largely
+don't, so descent halts. (Across all 3M `core.variant` rows, GRCh38-with-derived = ~2.99M,
+hs1-with-derived = ~72.9k — `hs1` is sparse globally too.)
+
+## What's needed (the ask)
+
+Populate `hs1` (and ideally `GRCh37`) coordinates for **all** Y-tree variants, by lifting their
+GRCh38 coordinate to `hs1`. Two viable shapes (the user is open to either):
+
+1. **Ingest / enrichment phase (persistent).** A `decodingus-tree-init` / enrichment step that,
+ for every `core.variant` with a GRCh38 coordinate but no `hs1`, lifts GRCh38→hs1 and writes
+ the `hs1` entry into `core.variant.coordinates`. The AppView already has GRCh38→hs1 liftover
+ infrastructure — see `rust/crates/du-jobs/src/ybrowse.rs` ("GRCh38 -> hs1 (T2T-CHM13) chain
+ file") and `rust/crates/du-migrate/src/bin/tree_init.rs` (`prod_build` maps `hs1`). Reuse the
+ same chain. **Scale/constraint:** the dev Postgres container is RAM-limited (1 GB) and was
+ OOM-killed by a single 3M-row `UPDATE` (migration 0021) — **batch** the enrichment (commit per
+ chunk) or scope it to tree-linked variants (~47k missing hs1) rather than all 3M.
+
+2. **On the fly (in the API).** In the `/api/v1/y-tree/full` handler, for any variant lacking an
+ `hs1` coordinate, lift its GRCh38 coordinate to `hs1` at response time (chain loaded at
+ startup). No DB mutation; pairs naturally with the existing `du-jobs` liftover. Heavier per
+ request, but the tree response is already cached on the Navigator side.
+
+Either way the goal is identical: every Y-tree variant carries a usable `hs1` coordinate (contig
+`chrY`, position, ancestral, derived) so Navigator's native-CHM13 path reaches terminals.
+
+## Interaction with the FTDNA-merge descope decision
+
+The user is separately deciding to **descope the FTDNA merge and build the tree from only the
+ISOGG + decoding-us prod trees**. That decision directly bears on this issue:
+
+- The `hs1` gap is concentrated in the **FTDNA-grafted** tips (decoding-us variants already carry
+ multi-build coords incl. `hs1`). If FTDNA tips are dropped, the remaining tree is
+ ISOGG + decoding-us — so check whether **ISOGG** coordinates include `hs1`/CHM13 (if ISOGG is
+ GRCh38-only, those nodes still need the GRCh38→hs1 lift).
+- Net: the enrichment lift is still likely needed for ISOGG-sourced nodes, but the volume and the
+ "which tips exist at all" both change. **Sequence this issue after (or alongside) the
+ FTDNA-descope decision** so we don't lift coordinates for variants we're about to drop.
+
+## Verification
+
+Once `hs1` is complete for the Y tree:
+- `GET /api/v1/y-tree/full` → most/all variants carry an `hs1` coordinate with `derived`.
+- Navigator `validate_gfx_decodingus_y` (in DUNavigator, against the AppView) reaches
+ **R-FGC29071** via the native-hs1 path (no liftover), matching the FTDNA reference result.
+
+## Local dev-DB state note (for whoever picks this up)
+
+To get the AppView running locally on 2026-06-10, migration **0021_ancestral_state** was applied
+**manually** to the dev DB (`du-pg` container, `postgres://postgres:dev@192.168.64.2:5432/
+decodingus`): its 3M-row `UPDATE` OOM-killed the 1 GB container, so the relabel was run in
+**100k-id batches** (committing each), then the part-2 `ALTER` ran, then the row was recorded in
+`_sqlx_migrations` (version 21, checksum `f78640156ad4…`) so `du-web` startup skips it. The DB is
+now at migration 21, clean. If you `reset` the container you'll need to re-apply 0021 the same
+batched way (or bump the container's memory).
+
+## Secondary (Navigator-side) observation — not blocking
+
+Navigator also has a GRCh38-coords + liftover fallback path for the DecodingUs tree. On the DU
+tree it currently under-matches (the GFX sample's M207 came back as a no-call; carried SNPs
+scattered → shallow placement) even though the DU GRCh38 coordinates match FTDNA's
+(55,293/55,354 same position). This looks like a back-map collision when many DU variants share
+or recur at the same lifted CHM13 position (the tree has recurrent-SNP/homoplasy structure — cf.
+migration 0021's per-branch allele columns). It's not on the critical path (native-hs1 is the
+intended route), but worth a look on the Navigator side if the GRCh38+lift fallback is kept.
diff --git a/documents/proposals/Messaging_and_Feed_System.md b/documents/proposals/Messaging_and_Feed_System.md
index f5406c13..4d89508f 100644
--- a/documents/proposals/Messaging_and_Feed_System.md
+++ b/documents/proposals/Messaging_and_Feed_System.md
@@ -1,5 +1,13 @@
# Messaging & Feed System Design
+> **📝 Status (2026-06-07): forward (Bucket B).** Schema present
+> (`social.{user_block, conversation, message, feed_post}`, mig 0009); **no logic**.
+> Refresh Slick→Rust when the social layer is scheduled. **Reconcile with the no-PII
+> direction:** DMs must NOT be central plaintext (`social.message`) — route them over
+> the **D1 encrypted relay** (or AT-Proto records); the public feed (AT-Proto
+> `feed.post` + AppView index) is fine. Reconcile threads with D4 assertions.
+> Triage: `triage-report.md` §5.
+
## 1. Overview
This document outlines the design for a comprehensive messaging and social feed system for DecodingUs. The goal is to facilitate communication between:
1. **Java Edge Applications (PDS Managers):** Automated or semi-automated agents running on user hardware (The Navigator Workbench) that need to coordinate with other nodes or contact researchers.
diff --git a/documents/proposals/Patronage_Donation_System.md b/documents/proposals/Patronage_Donation_System.md
index fc160da7..70329b53 100644
--- a/documents/proposals/Patronage_Donation_System.md
+++ b/documents/proposals/Patronage_Donation_System.md
@@ -1,5 +1,11 @@
# Patronage Donation System Proposal
+> **📝 Status (2026-06-07): deferred.** Not in scope for the current rewrite, but
+> **deferred, not dropped** — patronage/billing will likely return to fund
+> infrastructure once active users cross ~a few hundred (the `/faq` already names it
+> as the sustainability path). Refresh the Scala/Play/Stripe specifics to the Rust
+> stack when revived. Triage: `triage-report.md` §7.
+
## 1. Overview
DecodingUs is committed to operating as a free-to-use community service for genetic genealogy and population research. However, as the platform scales, the operational costs associated with hardware, hosting, and maintaining the DecodingUs Atmosphere will grow. To ensure long-term sustainability without compromising user privacy or monetizing user data, we propose implementing a Patronage Donation System.
diff --git a/documents/proposals/Reputation_System_Implementation.md b/documents/proposals/Reputation_System_Implementation.md
index b5da33e5..2cd17318 100644
--- a/documents/proposals/Reputation_System_Implementation.md
+++ b/documents/proposals/Reputation_System_Implementation.md
@@ -1,5 +1,11 @@
# Reputation System Implementation Plan
+> **📝 Status (2026-06-07): forward (Bucket B).** Schema present
+> (`social.{reputation_event_type, reputation_event, user_reputation_score}`,
+> mig 0009); **no logic**. The public `/reputation` page already describes it.
+> Refresh Slick→Rust when the social layer is scheduled (lower priority — depends on
+> social being live). Triage: `triage-report.md` §6.
+
## 1. Overview
The Reputation System is a core mechanism to ensure quality interactions within the DecodingUs social features (Messaging, Feed, Lab Discovery). It rewards positive contributions and limits spam/abuse by gating features based on a user's `UserReputationScore`.
diff --git a/documents/proposals/branch-age-estimation.md b/documents/proposals/branch-age-estimation.md
index bf427843..54db073c 100644
--- a/documents/proposals/branch-age-estimation.md
+++ b/documents/proposals/branch-age-estimation.md
@@ -1,5 +1,16 @@
# Branch Age Estimation System
+> **✅ Realized in the Rust build (status 2026-06-07).** The combined branch-age
+> framework is implemented: `tree.haplogroup_ancestral_str` + the combined age
+> (mig 0013/0014), `tree.genealogical_anchor`, `genomics.str_mutation_rate`,
+> `genomics.biosample_callable_loci`, `du_db::age` (`combine` +
+> `recompute_combined_ages`), `du_db::ystr`, the `branch-age-recompute` job, and
+> `GET /api/v1/haplogroups/{name}/age`. Caveats: the Rust combine is
+> **inverse-variance** (a simplification of the full PDF multiplication below) and
+> genealogical-anchor wiring may be partial. **Kept as the scientific methodology
+> reference** (mutation rates, multi-step STR frequencies, precision tables, the
+> McDonald port) for future refinement. Triage: `triage-report.md` §3.
+
**Reference:** McDonald, I. (2021). "Improved Models of Coalescence Ages of Y-DNA Haplogroups." *Genes*, 12(6), 862. https://doi.org/10.3390/genes12060862
**Status:** Backlog
@@ -18,7 +29,7 @@ This proposal integrates with other planning documents:
|----------|-------------|
| `../planning/haplogroup-discovery-system.md` | **Primary integration point.** SNP counts come from `tree.haplogroup_variant`. Private variants from `tree.biosample_private_variant` provide per-sample data for individual TMRCA calculations. Age recalculation should trigger when branches are promoted. |
| `../planning/multi-test-type-roadmap.md` | **Test type coverage data.** Callable loci vary by test type (WGS ~3Gbp, BigY-700 ~15Mbp, Chip ~2000 SNPs). Uses `test_type_definition` table for platform characteristics. |
-| `../planning/appview-pds-backfeed-system.md` | **PDS data flow.** STR profiles and private SNP counts flow from user PDS via firehose. Age estimates are NOT backfed (computed results, not user data). |
+| Federation ingest (Jetstream → `fed.*`) | **PDS data flow.** STR profiles and private SNP counts flow from the user PDS via the Jetstream summary mirror. Age estimates are AppView-computed (not backfed). |
| `group-project-system.md` | **Group TMRCA.** Group projects display TMRCA estimates in `projectTreeView`. Project-level modal haplotypes feed into STR-based age estimation. |
**Schema Note:** All haplogroup-related tables reside in the `tree` schema. Branch age fields (`formed_ybp`, `tmrca_ybp`, etc.) were added to `tree.haplogroup` in evolution 48.
@@ -79,10 +90,29 @@ Must account for:
- **Parallel mutations** (independent lines mutate to same value)
- **Multi-step mutations** (+2, -2, +3, etc.)
-**Multi-step frequencies:**
-- ω±1 ≈ 0.962 (single-step)
+**Multi-step frequencies** (McDonald §2.5.3, from ref [8]):
+- ω±1 ≈ 0.962 (single-step; adjusted to 0.96217 so Σω±n = 1)
- ω±2 ≈ 0.032 (two-step)
- ω±3 ≈ 0.004 (three-step)
+- ω±≥4 ÷√10 per further repeat
+
+**Implemented** (`du_db::ystr`): `P(g|m)` is McDonald's **Table 1**, embedded verbatim
+over its published range (g,m ≤ 10) and extended by the signed-step convolution of
+the ω above beyond it (deep-time, low-weight terms only — the convolution is the
+*exact* all-orders sum, so it differs from the f_r-truncated Table 1 by up to ~0.1 at
+a few cells; the embedded table is authoritative in-range). A marker's age term is
+`P(t|g) = Σ_m P(t|m)·P(g|m)` — a mixture over the hidden mutation count `m` of Poisson
+age PDFs (`du_db::pdf::Pdf::mixture`), rate per generation → years via
+`GENERATION_YEARS = 33`. STR ages **propagate up the tree** (`ystr::propagate_str`,
+the §2.2 SNP strategy): ancestral motifs are reconstructed for internal nodes
+(§2.5.2 up-pass modal-of-sub-clades + down-pass parent fill), then a node's TMRCA is
+the product over children of (child TMRCA ⊛ the parent→child STR branch time) and
+over direct tester tips — so internal nodes get ages from their descendants and a
+parent stays older than its children. (`compute_str_age`'s per-clade star pooling is
+retained as a utility but no longer drives the written ages.) Per-marker
+`omega_plus`/`omega_minus`/`multi_step_rate`
+(`genomics.str_mutation_rate`) build a marker-specific `P(g|m)` table when they depart
+from the global symmetric single-step-dominated model.
### Confidence Intervals
@@ -97,12 +127,86 @@ Must account for:
### 1. Reference Data (System-Level)
-#### SNP Mutation Rate Table
-| Region | Rate (SNPs/bp/yr) | 95% CI | Source |
-|--------|-------------------|--------|--------|
-| MSY Combined | 8.33 × 10⁻¹⁰ | 7.57–9.17 × 10⁻¹⁰ | Helgason 2015 |
-| X-degenerate + Ampliconic | 8.71 × 10⁻¹⁰ | 8.03–9.43 × 10⁻¹⁰ | Helgason 2015 |
-| Palindromic | 7.37 × 10⁻¹⁰ | 6.41–8.48 × 10⁻¹⁰ | Helgason 2015 |
+#### SNP Mutation Rate
+
+**The method uses a *single* combined rate** (McDonald 2021 §2.2.1, Eq 2–3; §3: "the
+combined Y-SNP mutation rate of Helgason et al. is used"): `µ_SNP = 8.33 × 10⁻¹⁰`
+SNPs/bp/yr (95% CI 7.57–9.17 × 10⁻¹⁰). It is **not** applied per-region.
+
+The per-region figures below are **evidence that the rate is ~constant across the
+MSY** (McDonald Appendix A.4, from Helgason 2015) — *not* a directive to apply
+different rates to different regions. The paper's conclusion: "the mutation rate is
+constant when sufficiently large regions of the MSY are considered."
+
+| Region | Rate (SNPs/bp/yr) | 95% CI | Notes |
+|--------|-------------------|--------|-------|
+| MSY combined (used) | 8.33 × 10⁻¹⁰ | 7.57–9.17 × 10⁻¹⁰ | The rate the model applies |
+| X-transposed + X-degenerate + ampliconic (15.2 Mbp) | 8.71 × 10⁻¹⁰ | 8.03–9.43 × 10⁻¹⁰ | ~constant evidence |
+| Palindromic (6.1 Mbp) | 7.37 × 10⁻¹⁰ | 6.41–8.48 × 10⁻¹⁰ | slightly lower (gene conversion), P=0.04 |
+
+**Region handling is by self-consistent *masking*, not per-region rates** (McDonald
+Appendix A.2): "As highly recurrent base pairs are excised from mutation-rate
+estimations, they should also be self-consistently removed from TMRCA calculations
+and excised from the subset of base pairs b̄." A.3 names the regions to mask
+(centromere, DYZ19; palindromic arms depending on calling). Ampliconic sequence is
+**kept** (same rate as X-degenerate). The implication for `b`: drop only the
+recurrent/heterochromatic regions — *not* all of ampliconic/palindromic — and ensure
+the SNP count `m` is excised over the same regions (`m ⊆ b`, McDonald §2.2.3).
+
+**Empirical validation (Hallast et al. 2026, 142 population-scale Y assemblies).**
+This masking choice is confirmed independently by the paper's three-way callable-mask
+comparison (their Fig 5h-i):
+- Their phylogeny ran on a **~10.4 Mb mask (10,400,778 callable positions, 25,426
+ polymorphic sites) = X-degenerate + ampliconic + "other"**, *excluding* X-transposed,
+ satellite, heterochromatin, DYZ19, and centromere — the same split as our denominator
+ (`y_xdegen + y_ampliconic + y_palindromic`, with `HET_MASK` dropping heterochromatic
+ SNPs).
+- **X-degenerate is the agreed, high-QV core** across all three masks (GRCh37 / T2T /
+ pangenome): retained bp 8.111 / 8.341 / 7.437 Mb, mean QV 50.2 / 55.2 / 60.9.
+- **Ampliconic is kept but lower quality** (QV 45.7 / 46.2 / 61.5) — consistent with
+ keeping it in `b` (same mutation rate) while flagging it low-confidence-for-*placement*.
+- **Satellite / heterochromatin / DYZ19 are low-QV (35–44) or uncallable; no mask calls
+ centromeric sequence** — validating their exclusion from the age count.
+- The de novo data underline this: 49/53 (92.5%) pedigree DNMs fall in Yq12, only ~1 SNV
+ in euchromatin, and 6/40 Yq12 SNVs trace to gene conversion (recurrent), not de novo —
+ i.e. the masked compartments are exactly where mutations are unreliable/recurrent.
+
+**Cross-check clock (Hallast et al. 2026).** The same paper provides an *independent*
+recent calibration we record but **do not** substitute for Helgason:
+
+| Clock | Rate (sub/site/yr) | 95% CI | Role |
+|-------|--------------------|--------|------|
+| Helgason 2015 (used) | 0.833 × 10⁻⁹ | 0.757–0.917 × 10⁻⁹ | the rate the model applies |
+| Hallast 2026 BEAST (cross-check) | 0.76 × 10⁻⁹ | 0.67–0.86 × 10⁻⁹ | sanity bound only |
+
+Method: BEAST v1.10.4 strict molecular clock, RAxML GTR+Γ start tree, constant-size
+coalescent, 150 M MCMC (10% burn-in), TreeAnnotator MCC tree — run on the ~10.4 Mb
+X-degenerate-style mask above. It is **~9% slower** than Helgason, so adopting it would
+push every TMRCA ~9% older; the two CIs overlap, so it functions as a consistency check,
+not a correction. Constants `HALLAST_RATE{,_LO,_HI}` live alongside `SNP_RATE` in
+`du_db::age`; the default stays Helgason (do not silently swap — surface both with
+provenance). The CEPH-pedigree de-novo rate (R1b lineages, Porubsky et al. 2025) is the
+matching *per-generation* empirical anchor for the same clock.
+
+**Calibration anchors (dated nodes).** Hallast's time-calibrated phylogeny (their Suppl.
+Fig. 1, ISOGG v15.73 labels; 95% HPD from BEAST) yields ready-made `tree.genealogical_anchor`
+rows — model-dated TMRCAs, *not* radiocarbon, so they carry `anchor_type = MODEL_DATED`
+and full provenance in `details` (source, clock, HPD) so a curator can down-weight or
+exclude them. Seeded by `scripts/seed-hallast-anchors.sql` (name-keyed, idempotent, run
+after the tree load). Currently mappable to our clade names:
+
+| Node | TMRCA (ybp) | 95% HPD | Source |
+|------|-------------|---------|--------|
+| D1 | 19,450 | 16,360–22,880 | Hallast 2026 Fig 1b / Suppl. Fig 1 |
+| HG00512 ⋂ HG02056 | ~10,300 | 8,400–12,300 | Hallast 2026 Suppl. Fig 61 |
+
+> **Circularity caveat:** these are themselves molecular-clock estimates, so feeding them
+> into the inverse-variance `COMBINED` term partly calibrates our SNP clock against another
+> SNP clock. That is intended (a tight external constraint on deep nodes), but it is *not*
+> independent evidence the way an aDNA C14 date is — hence `MODEL_DATED` and the recorded
+> provenance, so the term can be filtered. Most of the dated phylogeny lives in figures
+> (Suppl. Fig. 1 / Fig 1b) and Suppl. Tables, not extractable text; harvest more nodes from
+> the tables workbook when mapping them to our haplogroup names.
#### STR Mutation Rate Database
Per-marker mutation rates needed for ~700+ Y-STR markers:
@@ -201,7 +305,7 @@ case class StrMarkerValue(
### Edge Computing Model
-**Critical Architecture Principle** (from `appview-pds-backfeed-system.md`): Raw genomic data (BAM/CRAM/VCF) **never** flows to DecodingUs. All raw data analysis happens locally in the Navigator Workbench.
+**Critical Architecture Principle**: Raw genomic data (BAM/CRAM/VCF) **never** flows to DecodingUs. All raw data analysis happens locally in the Navigator Workbench.
```
┌─────────────────────────────────────────────────────────────────────────┐
@@ -495,13 +599,25 @@ Group projects compute modal STR haplotypes (`projectModal`). These can feed int
**Goal:** Add Y-STR data to improve precision.
**Tasks:**
-1. [ ] Create `genomics.str_mutation_rate` table
-2. [ ] Import mutation rates from Ballantyne/Willems studies
-3. [ ] Create `tree.haplogroup_ancestral_str` table
-4. [ ] Implement ancestral STR motif calculation (modal values)
-5. [ ] Implement P(g|m) mapping with multi-step mutations
-6. [ ] Create `StrAgeService` for STR-based age calculation
-7. [ ] Integrate STR PDFs into combined calculation
+1. [x] Create `genomics.str_mutation_rate` table (migration `0014_str_age`)
+2. [x] Import mutation rates from Ballantyne/Willems studies —
+ `scripts/seed-str-mutation-rates.sql` seeds 137 markers: Willems 2016 (1000G
+ MUTEA, 116 markers + 95% CIs) primary, YHRD combined rates gap-filling 11
+ core markers Willems' short-read set misses (DYS393, DYS390, DYS449, …).
+ Only DYS447 among common single-copy markers still falls back to
+ `DEFAULT_STR_RATE = 0.0025`. (Ballantyne is McDonald's ref [8] — its
+ single:multi-step 25.23:1 already sets the global ω.)
+3. [x] Create `tree.haplogroup_ancestral_str` table (migrations `0013`/`0014`)
+4. [x] Implement ancestral STR motif calculation (modal values) — `ystr::compute_modal`
+5. [x] Implement P(g|m) mapping with multi-step mutations — `ystr` (Table 1 + convolution)
+6. [x] Create `StrAgeService` for STR-based age calculation — `ystr::compute_str_age`
+ (multi-step PDF model; supersedes the legacy linear ΣΔ/Σµ estimator)
+7. [x] Integrate STR PDFs into combined calculation — `COMBINED` (`du_db::age`) is
+ the direct PDF product (Eq 1) of the SNP TMRCA PDF (propagation), the STR
+ TMRCA PDF (`ystr::str_tmrca_pdfs`), and the genealogical anchor PDF, all on
+ the shared TREE grid (50 yr / 350 ky) — preserving non-Gaussian shape instead
+ of inverse-variance-averaging medians. Disjoint terms fall back to the
+ Gaussian combine; a stored STR_VARIANCE row with no fresh PDF still contributes.
**Data needed:**
- Y-STR profiles from PDS (ensure Atmosphere capture)
diff --git a/documents/proposals/denovo-tree-ingestion.md b/documents/proposals/denovo-tree-ingestion.md
new file mode 100644
index 00000000..7beb1ddf
--- /dev/null
+++ b/documents/proposals/denovo-tree-ingestion.md
@@ -0,0 +1,228 @@
+# De-novo Y / mtDNA tree ingestion
+
+**Status:** proposed (2026-06-17). Supersedes the ISOGG-import tree foundation.
+
+## Why
+
+The tree foundation is moving from **importing ISOGG** (a curated external
+nomenclature) to **ingesting a tree we build ourselves** from genotypes. The
+de-novo pipeline at `~/Genomics/ytree` joint-calls chrY + chrM across four
+cohorts on CHM13v2 (hs1), builds an IQ-TREE ML tree, and runs marginal
+ancestral-state reconstruction to derive the defining SNPs of every branch —
+i.e. it produces an ISOGG-shaped tree (nodes + per-branch SNPs) but grounded in
+real data on a modern reference.
+
+This retires a whole class of problems at once: the ISOGG `~`-fold corruption,
+the cross-source SNP-anchor graft, the YCC-longhand rename, the legacy GRCh37
+coordinate frame, and the absence of an mt tree. The de-novo tree **is** the
+foundation; nothing is grafted onto it. The clearing front-end is the
+`du_db::haplogroup::reset_tree` / `tree-init --reset` work already in hand.
+
+The current artifacts are a **1,742 chrY (male) + 3,344 chrM** workflow-correctness
+batch. A more complete tree (adding the remaining HGDP + SGDP samples) follows;
+ingestion must therefore be **re-runnable as a clean replace**, not a merge.
+
+## Decisions (locked)
+
+1. **Seam = pipeline-side JSON export + Rust loader.** A new `68_export_ingest.py`
+ in `~/Genomics/ytree/bin` emits one normalized JSON per chromosome; a Rust
+ loader consumes it. Same architecture as the ISOGG `isogg_to_json.rb → json →
+ tree-init` seam — it decouples the phylo pipeline from our DB schema so neither
+ side breaks the other. (The historical bug was the import *logic*, not this
+ seam.)
+2. **Node naming = ISOGG / PhyloTree label primary.** Display name is the
+ compare-mapped label (`R-M269`, `B-Z43718`) where one exists; synthesized from
+ the node's strongest defining SNP otherwise. The de-novo `NodeN` id and the
+ ISOGG-clade mapping are retained in provenance.
+3. **Initial scope = topology + defining SNPs first** (Y, then mt). Sample-leaf
+ placement and the curation surface are follow-ups.
+4. **Coordinate frame = CHM13v2 / hs1.** Variants link to `core.variant` by hs1
+ coordinate, reusing the YBrowse-loaded catalog so known SNPs inherit their
+ names; novel de-novo SNPs are created.
+5. **Greenfield replace.** Each ingest clears `tree.*` (`reset_tree`) and loads
+ the de-novo tree as the sole foundation. No ISOGG/decoding-us/FTDNA layers.
+
+## Source artifacts (per chromosome)
+
+All under `~/Genomics/ytree/`. Y shown; mt mirrors it (`chrM.asr.*`, `mt_*`).
+
+| File | Role |
+|---|---|
+| `results/chrY.asr.treefile` | Newick; internal `NodeN/`, tips = sample IDs, branch lengths. **The topology.** |
+| `results/chrY.asr.branch_transitions.tsv` | per branch `parent→child`: `n_mut`, `n_reversion`, `chrY:pos anc>der` list |
+| `results/chrY.asr.snp_assignments.tsv` | per (branch, SNP): `chrom,pos,ref,alt,ancestral,derived,parent,child,to_alt,reversion,anc_chimp,polarity` |
+| `compare/internal_node_labels.tsv` | `our_node(NodeN) → isogg, label, markers_matched, markers_expected` — the **display name** + mapping |
+| `compare/tip_haplogroup_calls.tsv` | `sample → terminal_isogg, terminal_label, balance, path_derived, path_ancestral, path` — leaf metadata |
+| `compare/conflict_triage.tsv` | `isogg,label,n_tips,magnitude,members_away,foreign_in,home_node` — curation conflicts vs ISOGG |
+| `results/chrY.callable_mask.chm13v2.bed` | Poznik-style call mask (region reliability) |
+| `manifests/samples.tsv` | `sample, cohort, cram_path, sex` — tip → biosample provenance |
+
+We ingest the **publication tree** (`results/chrY.asr.publication.treefile`): the
+full ML tree collapsed under the builder's **keep-set rule** and QC-failed tips
+pruned (HG02772). A node survives iff **(UFBoot ≥ 95 AND it carries ≥1 defining
+mutation) OR it is a primary best-clade haplogroup placement** (`compare/chrY.keepset.tsv`).
+The keep-set is essential: rapid Y expansions give real, named macro-clades (R, R1,
+R1b — all UFBoot ≈ 80) only moderate bootstrap, so a pure UFBoot ≥ 95 rule **gutted
+the named backbone** (R-CTS4466 dangled directly under IJK). The keep-set preserves
+named clades even at moderate support while still collapsing anonymous weak nodes; it
+also dedupes recurrent placements to one node per haplogroup (e.g. spurious DF13
+`Node494` collapses; real DF13 `Node423` is kept). The **`n_mut ≥ 1` clause** (from
+`*.asr.branch_transitions.tsv`) drops zero-mutation bifurcations that UFBoot
+over-supported — the mtDNA "0 defining variant" placeholder nodes (`Node82`, `Node110`,
+…); their named children reattach to the parent as polytomies, so no tips are lost and
+every named clade survives. Because the exporter derives survival from the publication
+treefile itself, this refinement needed **no loader/exporter change** — only a
+re-export + reload once the builder regenerated the treefiles. The exporter reads the surviving `NodeN`
+set **directly from the publication treefile** (all artifacts share the full-tree
+`NodeN` namespace, so survivors keep their ids; SNPs/labels/tips still join by
+`NodeN`) rather than re-deriving the collapse. For chrY: **1,203 internal nodes**
+(from 1,740) + 1,741 tips; R-CTS4466 nests at depth 21.
+
+**Naming** (owned import-side; the keep-set/labels provide the clade identity, we
+format the display name). Per node, in order: **backbone/macro clade verbatim** if
+the clade (keep-set / `isogg`) has no lowercase — `A`, `IJK`, `BT`, `CT`, `NO`,
+`K2` (the comparison's `-` label mangles these: `IJK→I-M2696`);
+else the `-` `label` from `internal_node_labels` for ISOGG subclade
+longhands (`R1b1a…→R-L389`); else the node's own catalog-matched defining SNP
+(resolves `Node423→DF13`, `Node341→P312`); else a synthetic `chrY:>`
+coordinate name (≈106 genuinely novel de-novo clades with no known SNP); the root
+keeps its `NodeN`. Result: 1,097/1,204 (91%) carry proper haplogroup/SNP names.
+
+**Collapsed-branch SNPs (the one subtle point).** Collapsing 682 weak nodes
+orphans 8,919 defining SNPs whose true MRCA node no longer exists. Policy
+(decided): a collapsed branch's SNPs **lift to the nearest surviving ancestor** as
+a tagged *unresolved* block in that node's provenance — **not** strict defining
+links (the ancestor's other children don't carry them; exact placement in the
+subtree is unresolved). Surviving nodes keep only their **own** branch SNPs as
+defining links. This preserves every SNP exactly once (85,955 defining + 8,919
+unresolved = 94,874) with no link bloat and no invented homoplasy.
+
+## The contract — normalized ingest JSON
+
+`68_export_ingest.py` joins the treefile + the three result TSVs + the two
+compare TSVs + the manifest into one file per chromosome:
+
+```jsonc
+{
+ "chromosome": "chrY", // | "chrM"
+ "haplogroupType": "Y_DNA", // | "MT_DNA"
+ "build": "chm13v2.0",
+ "source": "decodingus-denovo",
+ "root": "Node1408",
+ "run": { "tips": 1742, "model": "GTR+ASC", "rooting": "polarize",
+ "ufboot": 1000, "date": "2026-06-17" },
+ "nodes": [
+ {
+ "id": "Node1409", // stable de-novo NodeN id
+ "parent": "Node1408", // null at root
+ "support": 100, // UFBoot → confidence_level
+ "branchLength": 0.007374,
+ "label": "B-Z43718", // mapped display name (null ⇒ synthesize)
+ "isogg": "B3", // mapped clade (provenance), nullable
+ "markersMatched": 502, "markersExpected": 521,
+ "nMut": 1924, "nReversion": 205,
+ "definingVariants": [ // this node's OWN branch → haplogroup_variant links
+ { "chrom":"chrY","pos":2472503,"ref":"A","alt":"T",
+ "ancestral":"A","derived":"T","reversion":false,"polarity":"forward" }
+ ],
+ "unresolvedVariants": [ // collapsed sub-branch SNPs → provenance block, NOT links
+ { "chrom":"chrY","pos":2480028,"ref":"C","alt":"T",
+ "ancestral":"C","derived":"T","reversion":false,"polarity":"forward" }
+ ]
+ }
+ ],
+ "tips": [ // phase 3 (leaf placement)
+ { "sample":"Ale22","parentNode":"Node1640","cohort":"PRJEB9586","sex":"male",
+ "terminalLabel":"J-Y27554","terminalIsogg":"J2a1a2b1~",
+ "balance":388,"pathDerived":391,"pathAncestral":3 }
+ ],
+ "conflicts": [
+ { "isogg":"A1b","label":"A-P108","nTips":1733,"magnitude":1,
+ "homeNode":"Node1404","foreignIn":1,"membersAway":0 }
+ ]
+}
+```
+
+Notes for the exporter:
+- A tip's **placement** is its parent `NodeN` in the Newick (not the ISOGG
+ `path` — that is naming/validation metadata only).
+- `definingVariants` come from `branch_transitions` cross-joined with
+ `snp_assignments` (the latter supplies `reversion`/`polarity`/`anc_chimp`).
+- Emit the full-tree node set; carry `support` so the AppView can collapse by
+ UFBoot for display rather than us discarding low-support structure.
+
+## Rust loader
+
+`decodingus-tree-init --denovo-y --apply` (and `--denovo-mt`), a new
+foundation path beside the existing `--isogg`:
+
+1. **Clear** — `reset_tree(pool)` (handles the `core.variant.defining_haplogroup_id`
+ FK + derived recurrence rows).
+2. **Nodes** → `tree.haplogroup`: `name` = `label` (disambiguated with `NodeN`/top
+ SNP on collision), `haplogroup_type`, `source='decodingus-denovo'`,
+ `confidence_level` from UFBoot, `provenance` = `{ node_id, isogg, markers_matched,
+ markers_expected, support, branch_length, n_mut, n_reversion }`.
+3. **Edges** → `tree.haplogroup_relationship` (parent→child, `source`).
+4. **Variants** → `core.variant` get-or-create **by hs1 coordinate**
+ (`coordinates` = `{build:'chm13v2.0', chrom, position, ancestral, derived}`):
+ reuse the YBrowse/ISOGG catalog row when (chrom,pos,ref,alt) matches so known
+ SNPs keep their `canonical_name`; else create a de-novo-named variant.
+ Link via `tree.haplogroup_variant` (`ancestral_allele`/`derived_allele`);
+ `reversion`/`polarity` → `annotations`.
+5. **Post** — `recompute_backbone`; bump `tree_revision`. (No `reconcile_tilde_twins`,
+ no graft, no rename — all ISOGG-specific.)
+
+Reuses the existing engines: `tree_revision` (cache ETag), `recompute_backbone`,
+and — for phase 3 — `tree_sample` (mig 0037) for the tip leaves.
+
+## Phasing
+
+1. **Y topology + SNPs — DONE.** exporter + loader; 1,204 nodes; validated against
+ `compare/summary.md` anchors (A–T at best-clade F1 ≈ 1.00; R-CTS4466 spine at depth 21).
+2. **mt tree — DONE.** Exporter generalized to `build(chrom)` + `CONFIGS`. mt differs:
+ rooted at the human MRCA `Node1767` (RSRS), the **CHIMP outgroup tip is dropped**,
+ there is **no `internal_node_labels`** (mt clade names `L0`/`H1a1`/`U5b2a1` are
+ the display form, taken verbatim from `chrM.keepset.tsv`), and the tip/conflict
+ TSVs use `mt_haplogroup` columns. **1,765 nodes / 3,344 tips** (after the `n_mut ≥ 1`
+ keep-rule refinement dropped the empty 0-mutation placeholders; was 2,015); catalog has no mt
+ variants so all SNPs mint. Loader uses **`clear_dna(dna)`** (dna-scoped, FKs are
+ NO ACTION → delete dependents first) so **Y and mt coexist**;
+ `tree-init --denovo-mt --apply`. Verified: `H1→H→HV→R→N→L3→…→RSRS`,
+ served at `/api/v1/mt-tree`.
+3. **Sample leaves — DONE.** `tips[]` → get-or-create `core.biosample` **by accession**
+ (deduped across lineages: a male is one biosample with a Y *and* an mt placement)
+ → `tree.haplogroup_sample` under the known `parentNode` (direct placement, not
+ call-resolution). `PRJEB*` cohorts EXTERNAL/public, own genome STANDARD/private.
+ 3,344 biosamples; Y 1,741 + mt 3,344 placements; reuses the mig-0037 leaf machinery
+ (`sample_count` + `…/node/{name}/samples`). WGS229 → `R-S1128`/`U5a1b1`; R-S1128 leaf
+ set = {NA20278, NA20279, WGS229} (matches the SCALEUP.md anchor).
+4. **Curation — DONE (conflicts).** `conflicts[]` → `tree.denovo_conflict` (mig 0039),
+ populated by the loader and replaced per-lineage (cleared by `clear_dna`/`reset_tree`).
+ Read-only Curator queue at **`/curator/denovo-conflicts`** (page + HTMX fragment,
+ lineage filter, worst-magnitude first) via `du_db::denovo::list_conflicts`; dashboard
+ card + i18n (en/es/fr). 88 Y + 37 mt conflicts.
+ **Call mask — deferred.** The chrY Poznik mask is **12,986 fine intervals** — a poor
+ fit for `core.genome_region`'s ~85-row named-region model, and de-novo variants are all
+ *in-mask* by construction (redundant). The right home is a dedicated callable-interval
+ representation / coverage-norm, designed separately.
+
+## Validation gates
+
+- Node + edge counts match the treefile; single root; fully reachable; no
+ multi-parent edges.
+- Macro clades A–T present and monophyletic-ish (F1 ≈ 1.00 per `summary.md`).
+- Spot anchors: `R-M269` clade size ≈ 289; `WGS229` terminal `R-FGC29076` on the
+ L21 path; mt `WGS229 → U5a1b1g`.
+- Defining-SNP reuse rate against the hs1 catalog (how many known vs novel).
+
+## Deferred / open
+
+- **HGDP + SGDP scale-up** — re-export + re-ingest when the fuller tree lands
+ (`SCALEUP.md`); ingestion is a clean replace by design.
+- **Node-name uniqueness policy** — when two de-novo nodes map to the same ISOGG
+ label (finer de-novo splits), the disambiguation rule (suffix `NodeN` vs top
+ SNP) needs a final call during phase 1.
+- **Novel-SNP naming** — convention for de-novo SNPs absent from the catalog
+ (position-based vs node-anchored).
+- **Branch lengths / ages** — `branchLength` is substitutions/site; feeding the
+ branch-age-estimation model is a later concern.
diff --git a/documents/proposals/group-project-system.md b/documents/proposals/group-project-system.md
index aaf8a342..ae935614 100644
--- a/documents/proposals/group-project-system.md
+++ b/documents/proposals/group-project-system.md
@@ -1,9 +1,18 @@
# Proposal: Privacy-First Group Project System
+> **📝 Status (2026-06-07): forward; reconciled by D5.** This is the member-sovereign
+> group-project proposal; `planning/d5-group-project-reconciliation.md` is the
+> authoritative reconciliation with D1–D4 — it adopts this proposal's roles/policies/
+> succession, maps its aggregate records (`projectTreeView`/`projectModal`/
+> `strComparison`) onto D4's rails, makes governance/membership the AppView-enforced
+> `research.project`/`project_member` ACL, and treats the member-sovereign visibility
+> model as the post-claim state. Unbuilt (`social`/`research` placeholder schema).
+> Triage: `triage-report.md` §4.
+
**Status:** Draft
**Author:** DecodingUs Team
**Created:** 2025-12-07
-**Related:** [Atmosphere Lexicon](../Atmosphere_Lexicon.md), [IBD Matching System](../planning/ibd-matching-system.md)
+**Related:** [Atmosphere Lexicon](../Atmosphere_Lexicon.md), IBD: [`d1-encrypted-edge-exchange.md`](../planning/d1-encrypted-edge-exchange.md) + [`d3-ibd-matching-impl.md`](../planning/d3-ibd-matching-impl.md)
## Overview
diff --git a/documents/proposals/haplogroup-tree-merge-api-proposal.md b/documents/proposals/haplogroup-tree-merge-api-proposal.md
deleted file mode 100644
index d666a3f4..00000000
--- a/documents/proposals/haplogroup-tree-merge-api-proposal.md
+++ /dev/null
@@ -1,346 +0,0 @@
-# Haplogroup Tree Merge API Proposal
-
-**Status**: Draft
-**Created**: 2025-12-12
-**Author**: DecodingUs Team
-
----
-
-## Executive Summary
-
-DecodingUs maintains a comprehensive haplogroup tree that serves as a foundation for genetic genealogy research. As the field matures, multiple authoritative sources—ISOGG, ytree.net, academic researchers, and citizen scientists—independently develop and refine portions of the phylogenetic tree. Currently, integrating updates from these sources requires manual curation, which is time-consuming and error-prone.
-
-This proposal introduces an automated Tree Merge API that enables programmatic integration of external haplogroup trees into the DecodingUs baseline. The system is source-agnostic: any researcher or institution can submit tree data through a secured API endpoint, with configurable priority rules determining how conflicts are resolved.
-
-A key design decision is **variant-based matching**. Because different sources use different naming conventions (ytree.net uses "R-L21", ISOGG uses "R1b1a1a2a1a1", DecodingUs uses "R1b-L21"), the merge algorithm matches nodes by their defining genetic variants rather than names. This ensures accurate alignment regardless of nomenclature differences.
-
-The system tracks **multi-source provenance** through a JSONB column storing which sources contributed to each node and variant. ISOGG serves as the authoritative backbone and retains primary credit on existing nodes, while incoming sources receive credit for new discoveries—splits that reveal finer structure and new terminal branches they contribute.
-
-The API supports both full tree replacement and subtree merging under a designated anchor node, with dry-run capability for previewing changes before application. All endpoints are protected by API key authentication, ensuring only authorized integrations can modify tree data.
-
-This infrastructure positions DecodingUs as a collaborative hub for phylogenetic research while preserving attribution for original discoveries and maintaining data integrity through priority-based conflict resolution.
-
----
-
-## Overview
-
-Add API-key protected endpoints for automated haplogroup tree migration from external researcher sources into the DecodingUs baseline tree, with multi-source provenance tracking via JSONB column.
-
-## Design Philosophy
-
-- **DecodingUs is the baseline** - The existing internal tree that external sources merge into
-- **Source-agnostic** - Any researcher or institution can submit trees (e.g., ISOGG, ytree.net, academic researchers, citizen scientists)
-- **Priority ranking retained** - Configurable source priority for conflict resolution
-- **Full attribution** - Track all contributing sources per node and variant
-
-## Credit Assignment Rules
-
-Merges are applied tree-by-tree from a designated anchor node. Credit follows a tiered model:
-
-1. **ISOGG is primary.** Existing nodes with ISOGG credit retain it. ISOGG serves as the authoritative backbone for haplogroup nomenclature.
-
-2. **Incoming source gets credit for new discoveries:**
- - **New splits** - When incoming data reveals finer structure (new intermediate branches), the source gets credit for those split nodes
- - **New terminal branches** - When incoming data adds leaf nodes not in the existing tree, the source gets credit
-
-This ensures ISOGG maintains credit for the established tree structure while researchers who discover new sub-branches or terminal clades receive attribution for their contributions.
-
-The `primaryCredit` field in provenance tracks which source gets discovery attribution, separate from `nodeProvenance` which tracks all contributors.
-
-## Requirements
-
-- **Attribution**: JSONB column for multi-source provenance tracking
-- **Input Format**: Nested JSON tree structure (PhyloNode-like)
-- **Conflict Resolution**: Priority-based (caller-specified source ordering)
-- **Update Modes**: Both subtree anchor and full tree replacement
-
----
-
-## Technical Design
-
-### 1. Database Schema Changes
-
-Add `provenance JSONB` column to `tree.haplogroup` table with GIN index.
-
-```sql
-ALTER TABLE tree.haplogroup ADD COLUMN provenance JSONB;
-CREATE INDEX idx_haplogroup_provenance ON tree.haplogroup USING GIN (provenance);
-```
-
-### 2. Provenance Data Model
-
-```scala
-case class HaplogroupProvenance(
- primaryCredit: String, // Source with discovery credit (applying credit rules)
- nodeProvenance: Set[String], // All sources contributing to node existence
- variantProvenance: Map[String, Set[String]], // Per-variant source attribution
- lastMergedAt: Option[LocalDateTime],
- lastMergedFrom: Option[String]
-)
-```
-
-**Credit assignment:** ISOGG credit is preserved on existing nodes. Incoming source gets `primaryCredit` for new splits and new terminal branches they contribute.
-
-### 3. Provenance JSONB Structure
-
-```json
-{
- "primaryCredit": "ytree.net",
- "nodeProvenance": ["ytree.net", "DecodingUs"],
- "variantProvenance": {
- "M269": ["ytree.net", "DecodingUs"],
- "L21": ["ytree.net"]
- },
- "lastMergedAt": "2025-12-12T10:30:00",
- "lastMergedFrom": "ytree.net"
-}
-```
-
----
-
-## API Design
-
-### Endpoints
-
-| Method | Endpoint | Description |
-|--------|----------|-------------|
-| POST | `/api/v1/manage/haplogroups/merge` | Merge full haplogroup tree |
-| POST | `/api/v1/manage/haplogroups/merge/subtree` | Merge subtree under anchor node |
-| POST | `/api/v1/manage/haplogroups/merge/preview` | Preview merge without applying |
-
-All endpoints are secured with X-API-Key authentication.
-
-### Request Models
-
-**PhyloNodeInput** - Input tree node structure (source-agnostic)
-```scala
-case class PhyloNodeInput(
- name: String,
- variants: List[String] = List.empty,
- formedYbp: Option[Int] = None,
- formedYbpLower: Option[Int] = None,
- formedYbpUpper: Option[Int] = None,
- tmrcaYbp: Option[Int] = None,
- tmrcaYbpLower: Option[Int] = None,
- tmrcaYbpUpper: Option[Int] = None,
- children: List[PhyloNodeInput] = List.empty
-)
-```
-
-**SourcePriorityConfig** - Dynamic priority ordering (caller specifies)
-```scala
-case class SourcePriorityConfig(
- sourcePriorities: List[String], // First = highest priority
- defaultPriority: Int = 100
-)
-```
-
-**ConflictStrategy** - Conflict resolution modes
-- `HigherPriorityWins` - Higher priority source wins conflicts
-- `KeepExisting` - Always keep existing values
-- `AlwaysUpdate` - Always use incoming values
-
-**SubtreeMergeRequest**
-```scala
-case class SubtreeMergeRequest(
- haplogroupType: HaplogroupType, // Y or MT
- anchorHaplogroupName: String, // e.g., "R1b"
- sourceTree: PhyloNodeInput,
- sourceName: String, // Any identifier
- priorityConfig: Option[SourcePriorityConfig] = None,
- conflictStrategy: Option[ConflictStrategy] = None,
- dryRun: Boolean = false
-)
-```
-
-### Response Models
-
-**TreeMergeResponse**
-```scala
-case class TreeMergeResponse(
- success: Boolean,
- message: String,
- statistics: MergeStatistics,
- conflicts: List[MergeConflict] = List.empty,
- errors: List[String] = List.empty
-)
-
-case class MergeStatistics(
- nodesProcessed: Int,
- nodesCreated: Int,
- nodesUpdated: Int,
- nodesUnchanged: Int,
- variantsAdded: Int,
- variantsUpdated: Int,
- relationshipsCreated: Int,
- relationshipsUpdated: Int
-)
-
-case class MergeConflict(
- haplogroupName: String,
- field: String,
- existingValue: String,
- newValue: String,
- resolution: String,
- existingSource: String,
- newSource: String
-)
-```
-
-### API Usage Example
-
-```bash
-# Merge ytree.net tree under R1b anchor - ytree.net gets primary credit (default)
-curl -X POST https://api.decodingus.com/api/v1/manage/haplogroups/merge/subtree \
- -H "X-API-Key: $API_KEY" \
- -H "Content-Type: application/json" \
- -d '{
- "haplogroupType": "Y",
- "anchorHaplogroupName": "R1b",
- "sourceName": "ytree.net",
- "sourceTree": {
- "name": "R1b-L21",
- "variants": ["L21", "S145"],
- "children": [
- {
- "name": "R1b-DF13",
- "variants": ["DF13"],
- "children": []
- }
- ]
- },
- "priorityConfig": {
- "sourcePriorities": ["ytree.net", "DecodingUs"]
- },
- "dryRun": false
- }'
-```
-
----
-
-## Merge Algorithm
-
-### Node Matching Strategy
-
-**Match on variants, not names.** Different sources use different naming schemes:
-- ytree.net: `R-L21`
-- ISOGG: `R1b1a1a2a1a1`
-- DecodingUs: `R1b-L21`
-
-All refer to the same haplogroup defined by variant `L21`. The merge algorithm matches nodes by their defining variants.
-
-### Process Flow
-
-Starting from anchor (e.g., R1b), walk down the tree:
-
-1. **Index existing tree by variant sets** - Build lookup from variant → haplogroup
-2. **For each incoming node**, find matching existing node by variants:
- - Exact match: Same defining variants → merge/update
- - Partial overlap: Shared variants → potential match, check tree position
- - No match: New branch → create
-3. **Assign primary credit** - ISOGG preserved on existing nodes; incoming source credited for new splits and terminal branches
-4. **Merge node data** based on priority config (age estimates, metadata)
-5. **Recurse into children**, maintaining parent-child relationships
-6. Return statistics and conflicts
-
-### Example: Merging ytree.net under R1b
-
-```
-Anchor: R1b (matched by variant M343)
- └─ ytree.net sends: R-L21 [variants: L21, S145]
- └─ Matches existing: R1b-L21 [variants: L21, S145] ✓
- └─ ytree.net sends: R-DF13 [variants: DF13]
- └─ Matches existing: R1b-DF13 [variants: DF13] ✓
- └─ ytree.net sends: R-ZZ123 [variants: ZZ123]
- └─ No match → CREATE new branch
-```
-
-### Handling Branch Splits
-
-As phylogenetic research advances, existing branches often need to be split into finer sub-branches. The merge algorithm detects and handles these splits automatically.
-
-**Split Detection:**
-A split is detected when incoming data introduces intermediate nodes between an existing parent-child relationship. This occurs when:
-1. Incoming tree has a node with variants that are a subset of an existing node's variants
-2. The incoming node positions itself between the existing node and its parent
-3. Some existing children should be reassigned to the new intermediate node
-
-**Split Process:**
-
-```
-BEFORE (DecodingUs tree):
-R1b-L21 [variants: L21, S145, Z290]
- └─ R1b-DF13 [variants: DF13]
- └─ R1b-L513 [variants: L513]
-
-INCOMING (ytree.net):
-R-L21 [variants: L21, S145]
- └─ R-Z290 [variants: Z290] ← NEW intermediate branch
- └─ R-DF13 [variants: DF13]
- └─ R-L513 [variants: L513]
-
-AFTER (merged):
-R1b-L21 [variants: L21, S145] ← Z290 removed, moved to child
- └─ R1b-Z290 [variants: Z290] ← NEW intermediate node created
- └─ R1b-DF13 [variants: DF13] ← Reassigned under Z290
- └─ R1b-L513 [variants: L513] ← Reassigned under Z290
-```
-
-**Split Algorithm:**
-1. **Identify variant redistribution** - Compare incoming node's variants against existing node
-2. **Create intermediate node** - If incoming shows finer structure, create new branch with subset of variants
-3. **Reassign children** - Move existing children under the new intermediate based on incoming tree structure
-4. **Update parent node** - Remove variants that moved to the new intermediate
-5. **Record provenance** - Credit the source that provided the split information
-
-**Conflict Handling:**
-- If split conflicts with existing structure (e.g., would orphan branches), flag for manual review
-- Priority config determines whether to apply split or preserve existing structure
-- Dry-run mode shows proposed splits before application
-
-**Helper methods:**
-- `findByVariants(variants: Set[String]): Option[Haplogroup]` - Lookup existing haplogroup by defining variants
-- `variantOverlap(a: Set[String], b: Set[String]): Double` - Calculate Jaccard similarity for fuzzy matching
-
----
-
-## Implementation Files
-
-| File | Action |
-|------|--------|
-| `conf/evolutions/default/52.sql` | CREATE - Schema migration |
-| `app/models/domain/haplogroups/HaplogroupProvenance.scala` | CREATE - Provenance model |
-| `app/models/domain/haplogroups/Haplogroup.scala` | MODIFY - Add provenance field |
-| `app/models/dal/domain/haplogroups/HaplogroupsTable.scala` | MODIFY - Add column + projection |
-| `app/models/dal/MyPostgresProfile.scala` | MODIFY - Add JSONB type mapper |
-| `app/models/api/haplogroups/TreeMergeModels.scala` | CREATE - API DTOs |
-| `app/repositories/HaplogroupCoreRepository.scala` | MODIFY - Add provenance methods |
-| `app/services/HaplogroupTreeMergeService.scala` | CREATE - Merge service |
-| `app/controllers/HaplogroupTreeMergeController.scala` | CREATE - API controller |
-| `conf/routes` | MODIFY - Add 3 routes |
-| `app/modules/ServicesModule.scala` | MODIFY - Add service binding |
-| `app/api/TreeMergeEndpoints.scala` | CREATE (optional) - Swagger docs |
-
-## Implementation Order
-
-1. Evolution (52.sql)
-2. HaplogroupProvenance.scala
-3. Haplogroup.scala update
-4. HaplogroupsTable.scala update
-5. MyPostgresProfile.scala type mapper
-6. TreeMergeModels.scala
-7. HaplogroupCoreRepository.scala updates
-8. HaplogroupTreeMergeService.scala
-9. HaplogroupTreeMergeController.scala
-10. Routes update
-11. ServicesModule.scala binding
-12. Tapir endpoints (optional)
-
----
-
-## Notes
-
-- `sourceName` field accepts any string identifier (institution, researcher name, project name)
-- `nodeProvenance` in input is optional - defaults to `sourceName`
-- The service applies credit rules automatically based on variant prefixes and ancestry
-- Dry-run mode available for testing merges without applying changes
diff --git a/documents/proposals/pds-workbench-biosample-flow.md b/documents/proposals/pds-workbench-biosample-flow.md
deleted file mode 100644
index 9c8fab4d..00000000
--- a/documents/proposals/pds-workbench-biosample-flow.md
+++ /dev/null
@@ -1,1149 +0,0 @@
-# PDS Workbench Biosample Flow Design
-
-## Overview
-
-This proposal describes a redesigned biosample management flow where researchers use the **Decoding-Us Navigator** desktop application as their primary interface for managing external biosamples, with data flowing naturally through their Personal Data Store (PDS) to the DecodingUs AppView.
-
-### Current State
-
-Today, researchers submit external biosamples via dedicated REST APIs:
-- `POST /api/private/external/biosamples` (traditional biosample API)
-- `POST /api/external-biosamples` (citizen/firehose-aware API)
-
-These APIs require:
-1. Manual JSON payload construction
-2. Direct API authentication
-3. No local preview or validation
-4. No workspace organization
-5. Disconnect between local analysis and remote submission
-
-### Proposed State
-
-Researchers use Navigator's workspace to:
-1. Organize biosamples into projects locally
-2. Import and analyze BAM/CRAM files with full GATK pipeline
-3. Compose biosample metadata with publication linkage
-4. Sync biosamples to their PDS (creating Atmosphere Lexicon records)
-5. DecodingUs AppView automatically ingests via Firehose subscription
-
-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│ RESEARCHER WORKFLOW │
-├─────────────────────────────────────────────────────────────────────────┤
-│ │
-│ ┌─────────────────────┐ ┌─────────────────────┐ │
-│ │ BAM/CRAM Files │────▶│ Navigator Desktop │ │
-│ │ (Local Analysis) │ │ Application │ │
-│ └─────────────────────┘ └──────────┬──────────┘ │
-│ │ │
-│ ┌──────────▼──────────┐ │
-│ │ Local Workspace │ │
-│ │ - Projects │ │
-│ │ - Biosamples │ │
-│ │ - Analysis Cache │ │
-│ └──────────┬──────────┘ │
-│ │ │
-│ ┌──────────▼──────────┐ │
-│ │ PDS Sync Engine │ │
-│ │ (AT Protocol) │ │
-│ └──────────┬──────────┘ │
-│ │ │
-└─────────────────────────────────────────┼────────────────────────────────┘
- │
- ┌──────────▼──────────┐
- │ Researcher's PDS │
- │ - workspace │
- │ - biosample(s) │
- │ - sequencerun(s) │
- │ - alignment(s) │
- │ - strProfile(s) │
- └──────────┬──────────┘
- │
- ┌──────────▼──────────┐
- │ AT Protocol │
- │ Firehose │
- └──────────┬──────────┘
- │
- ┌──────────▼──────────┐
- │ DecodingUs AppView │
- │ (Backend) │
- └──────────────────────┘
-```
-
----
-
-## Record Flow Mapping
-
-### From Navigator Analysis to Atmosphere Lexicon Records
-
-| Navigator Concept | Atmosphere Record | Notes |
-|:---|:---|:---|
-| Workspace | `workspace` | Root container in PDS |
-| Project | `project` | Aggregates biosamples for research |
-| Biosample | `biosample` | Core sample with donor metadata |
-| Library Analysis | `sequencerun` | From BAM/CRAM header parsing |
-| WGS Metrics | `alignment` | Coverage stats, callable loci |
-| Haplogroup Results | `biosample.haplogroups` | Y-DNA and mtDNA assignments |
-| STR Extraction | `strProfile` | If STR calling enabled |
-| Publication Link | External reference | Via `publication` field in request |
-
-### Analysis-to-Record Mapping
-
-```
-Navigator Analysis Pipeline Atmosphere Records Created
-───────────────────────────── ─────────────────────────────
-
-┌─────────────────────────┐
-│ Import BAM/CRAM │
-│ (drag-drop or picker) │
-└───────────┬─────────────┘
- │
- ▼
-┌─────────────────────────┐ ┌─────────────────────────┐
-│ Library Statistics │────────▶│ sequencerun │
-│ - Platform detection │ │ - platformName │
-│ - Read length │ │ - instrumentModel │
-│ - Insert size │ │ - instrumentId │
-│ - @RG header parsing │ │ - testType │
-└───────────┬─────────────┘ │ - files[] │
- │ └─────────────────────────┘
- ▼
-┌─────────────────────────┐ ┌─────────────────────────┐
-│ WGS Metrics │────────▶│ alignment │
-│ - Mean coverage │ │ - referenceBuild │
-│ - Depth thresholds │ │ - aligner │
-│ - Per-contig stats │ │ - metrics.meanCoverage │
-│ - Callable loci │ │ - metrics.contigs[] │
-└───────────┬─────────────┘ └─────────────────────────┘
- │
- ▼
-┌─────────────────────────┐ ┌─────────────────────────┐
-│ Haplogroup Analysis │────────▶│ biosample.haplogroups │
-│ - Y-DNA tree matching │ │ - yDna.haplogroupName │
-│ - mtDNA tree matching │ │ - yDna.lineagePath[] │
-│ - Private SNP detection │ │ - yDna.privateVariants │
-└───────────┬─────────────┘ │ - mtDna.* │
- │ └─────────────────────────┘
- ▼
-┌─────────────────────────┐ ┌─────────────────────────┐
-│ STR Extraction │────────▶│ strProfile │
-│ (Optional, from WGS) │ │ - markers[] │
-│ - HipSTR/GangSTR │ │ - derivationMethod │
-└─────────────────────────┘ │ - source: WGS_DERIVED │
- └─────────────────────────┘
-```
-
----
-
-## Data Model Extensions
-
-### Local Workspace State (Navigator)
-
-Navigator needs to track sync state for each local entity:
-
-```scala
-case class SyncState(
- atUri: Option[String], // AT URI if synced to PDS
- atCid: Option[String], // Content ID for versioning
- syncStatus: SyncStatus, // Pending, Synced, Modified, Conflict
- lastSyncedAt: Option[Instant],
- localVersion: Int, // Local modification counter
- remoteVersion: Option[Int] // PDS meta.version
-)
-
-enum SyncStatus:
- case NotSynced // Never pushed to PDS
- case Pending // Queued for sync
- case Syncing // Currently uploading
- case Synced // Up to date with PDS
- case Modified // Local changes since last sync
- case Conflict // Both local and remote changed
- case Error // Sync failed
-```
-
-### Biosample Composition Model
-
-Navigator needs a richer model for composing biosamples before sync:
-
-```scala
-case class ComposedBiosample(
- // Core identity
- localId: UUID,
- sampleAccession: String,
- donorIdentifier: Option[String],
-
- // Donor metadata
- description: Option[String],
- sex: Option[BiologicalSex],
- location: Option[GeoCoordinate],
-
- // Analysis results (from Navigator pipeline)
- analysisResults: Option[AnalysisResults],
-
- // Publication linkage
- publication: Option[PublicationInfo],
-
- // Sync state
- syncState: SyncState,
-
- // Project membership (local organization)
- projectIds: Set[UUID]
-)
-
-case class AnalysisResults(
- libraryStats: Option[LibraryStatistics],
- wgsMetrics: Option[WgsMetrics],
- callableLoci: Option[CallableLociSummary],
- yDnaHaplogroup: Option[HaplogroupResult],
- mtDnaHaplogroup: Option[HaplogroupResult],
- strProfile: Option[StrProfile],
- privateSnps: Option[PrivateSnpReport]
-)
-
-case class PublicationInfo(
- doi: Option[String],
- pubmedId: Option[String],
- title: Option[String],
- authors: Option[String],
- year: Option[Int],
- originalHaplogroups: Option[OriginalHaplogroupInfo]
-)
-```
-
----
-
-## Navigator UI Modifications
-
-### 1. Enhanced Workspace View
-
-**Current**: Simple list of projects and biosamples
-**Proposed**: Rich workspace with sync status indicators
-
-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│ Workspace [↻ Sync All] │
-├─────────────────────────────────────────────────────────────────────────┤
-│ │
-│ 🔵 PDS: did:plc:researcher123 Connected ✓ │
-│ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ │
-│ │
-│ 📁 Viking Age Study (12 samples) [⬆ 3 pending] │
-│ │ │
-│ ├── 🧬 VIK-001 R-Z284 ✓ Synced │
-│ ├── 🧬 VIK-002 I-M253 ⬆ Modified (haplogroup updated) │
-│ ├── 🧬 VIK-003 R-U106 ○ Not synced │
-│ └── ... │
-│ │
-│ 📁 Iron Age Britain (8 samples) [✓ All synced] │
-│ │ │
-│ └── ... │
-│ │
-│ 📁 Unpublished Analysis (draft) [○ Local only] │
-│ │
-└─────────────────────────────────────────────────────────────────────────┘
-```
-
-### 2. Biosample Composition Panel
-
-New panel for composing biosample metadata before sync:
-
-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│ Biosample: VIK-003 [Save] [Sync ⬆] │
-├─────────────────────────────────────────────────────────────────────────┤
-│ │
-│ ┌─ Identity ─────────────────────────────────────────────────────────┐ │
-│ │ Sample Accession: [VIK-003____________] │ │
-│ │ Donor Identifier: [DONOR-VIK-003______] (optional) │ │
-│ │ Description: [Ancient DNA from Birka burial site_________] │ │
-│ └────────────────────────────────────────────────────────────────────┘ │
-│ │
-│ ┌─ Donor Metadata ───────────────────────────────────────────────────┐ │
-│ │ Biological Sex: (•) Male ( ) Female ( ) Unknown │ │
-│ │ Location: [59.3369°N, 17.5544°E] 📍 │ │
-│ │ Date Range: [750] to [850] CE │ │
-│ └────────────────────────────────────────────────────────────────────┘ │
-│ │
-│ ┌─ Analysis Results (from Navigator) ────────────────────────────────┐ │
-│ │ ✓ Library Stats Platform: Illumina NovaSeq │ │
-│ │ ✓ WGS Metrics Coverage: 32.5x │ │
-│ │ ✓ Y-DNA Haplogroup R-U106 (score: 0.97) │ │
-│ │ ✓ mtDNA Haplogroup H1a (score: 0.99) │ │
-│ │ ○ STR Profile [Run STR Extraction] │ │
-│ │ ✓ Private SNPs 3 novel variants detected │ │
-│ └────────────────────────────────────────────────────────────────────┘ │
-│ │
-│ ┌─ Publication Link (optional) ──────────────────────────────────────┐ │
-│ │ DOI: [10.1038/s41586-024-00001-1] [🔍 Lookup] │ │
-│ │ PubMed: [39012345] │ │
-│ │ Title: Ancient Genomics of Viking Age Scandinavia │ │
-│ │ Authors: Smith et al. │ │
-│ │ │ │
-│ │ Original Haplogroups (from paper): │ │
-│ │ Y-DNA: [R1a1a1_______] mtDNA: [H1a__________] │ │
-│ │ Notes: [Supplementary Table S2, Sample ID: BKA-003] │ │
-│ └────────────────────────────────────────────────────────────────────┘ │
-│ │
-│ ┌─ Sync Status ──────────────────────────────────────────────────────┐ │
-│ │ Status: ○ Not yet synced to PDS │ │
-│ │ [ Sync to PDS ] [ Preview JSON ] │ │
-│ └────────────────────────────────────────────────────────────────────┘ │
-│ │
-└─────────────────────────────────────────────────────────────────────────┘
-```
-
-### 3. Bulk Import Wizard
-
-For researchers importing multiple samples from a publication:
-
-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│ Bulk Import Wizard Step 2/4 │
-├─────────────────────────────────────────────────────────────────────────┤
-│ │
-│ Publication: 10.1038/s41586-024-00001-1 │
-│ "Ancient Genomics of Viking Age Scandinavia" │
-│ │
-│ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ │
-│ │
-│ Import CSV with sample metadata: │
-│ │
-│ ┌─────────────────────────────────────────────────────────────────┐ │
-│ │ [sample_metadata.csv] [Browse...] │ │
-│ └─────────────────────────────────────────────────────────────────┘ │
-│ │
-│ Column Mapping: │
-│ ┌────────────────────┬────────────────────────────────────────────┐ │
-│ │ CSV Column │ Maps To │ │
-│ ├────────────────────┼────────────────────────────────────────────┤ │
-│ │ sample_id │ [Sample Accession ▼] │ │
-│ │ sex │ [Biological Sex ▼] │ │
-│ │ lat │ [Latitude ▼] │ │
-│ │ lon │ [Longitude ▼] │ │
-│ │ y_haplogroup │ [Original Y-DNA ▼] │ │
-│ │ mt_haplogroup │ [Original mtDNA ▼] │ │
-│ │ bam_path │ [BAM File Path ▼] │ │
-│ └────────────────────┴────────────────────────────────────────────┘ │
-│ │
-│ Preview (first 5 rows): │
-│ ┌────────┬─────┬─────────┬──────────┬────────────────────────────┐ │
-│ │ ID │ Sex │ Y-Hg │ mt-Hg │ BAM │ │
-│ ├────────┼─────┼─────────┼──────────┼────────────────────────────┤ │
-│ │ VIK-01 │ M │ R-Z284 │ H1a │ /data/viking/VIK-01.bam │ │
-│ │ VIK-02 │ M │ I-M253 │ U5b │ /data/viking/VIK-02.bam │ │
-│ │ VIK-03 │ F │ - │ H1c │ /data/viking/VIK-03.bam │ │
-│ │ VIK-04 │ M │ R-U106 │ K1a │ /data/viking/VIK-04.bam │ │
-│ │ VIK-05 │ M │ N-L550 │ H6a │ /data/viking/VIK-05.bam │ │
-│ └────────┴─────┴─────────┴──────────┴────────────────────────────┘ │
-│ │
-│ [◀ Back] [Next: Analyze ▶]│
-│ │
-└─────────────────────────────────────────────────────────────────────────┘
-```
-
-### 4. Sync Status Dashboard
-
-Global view of PDS sync state:
-
-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│ PDS Sync Dashboard │
-├─────────────────────────────────────────────────────────────────────────┤
-│ │
-│ Connection: did:plc:researcher123 @ bsky.social ✓ Connected │
-│ │
-│ ┌─ Sync Summary ─────────────────────────────────────────────────────┐ │
-│ │ │ │
-│ │ Total Biosamples: 156 │ │
-│ │ ├── ✓ Synced: 142 (91%) │ │
-│ │ ├── ⬆ Pending: 8 (5%) │ │
-│ │ ├── ⚠ Conflicts: 2 (1%) │ │
-│ │ └── ○ Local only: 4 (3%) │ │
-│ │ │ │
-│ │ Last sync: 2025-12-07 14:30:22 UTC │ │
-│ │ │ │
-│ └────────────────────────────────────────────────────────────────────┘ │
-│ │
-│ ┌─ Pending Changes ──────────────────────────────────────────────────┐ │
-│ │ │ │
-│ │ ☑ VIK-002 Modified: Haplogroup refined R-Z284 → R-Z284>BY3456 │ │
-│ │ ☑ VIK-015 New: Ready for initial sync │ │
-│ │ ☑ VIK-016 New: Ready for initial sync │ │
-│ │ ☐ IAB-003 Modified: Coverage updated (re-analysis) │ │
-│ │ ... │ │
-│ │ │ │
-│ │ [Select All] [Deselect All] [Sync Selected (3) ⬆] │ │
-│ │ │ │
-│ └────────────────────────────────────────────────────────────────────┘ │
-│ │
-│ ┌─ Conflicts (require resolution) ───────────────────────────────────┐ │
-│ │ │ │
-│ │ ⚠ ANC-007 Local: mtDNA H1a Remote: mtDNA H1a1 (updated by │ │
-│ │ AppView haplogroup refinement) │ │
-│ │ [Keep Local] [Accept Remote] [View Diff] │ │
-│ │ │ │
-│ │ ⚠ ANC-012 Local: deleted Remote: still exists │ │
-│ │ [Confirm Delete] [Restore Local] │ │
-│ │ │ │
-│ └────────────────────────────────────────────────────────────────────┘ │
-│ │
-└─────────────────────────────────────────────────────────────────────────┘
-```
-
-### 5. Publication Lookup Integration
-
-DOI/PubMed lookup with auto-population:
-
-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│ Publication Lookup │
-├─────────────────────────────────────────────────────────────────────────┤
-│ │
-│ Enter DOI or PubMed ID: [10.1038/s41586-024-00001-1____] [🔍 Search] │
-│ │
-│ ┌─ Found Publication ────────────────────────────────────────────────┐ │
-│ │ │ │
-│ │ Title: Ancient Genomics of Viking Age Scandinavia │ │
-│ │ Authors: Smith J, Jones A, Brown B, et al. │ │
-│ │ Journal: Nature (2024) │ │
-│ │ DOI: 10.1038/s41586-024-00001-1 │ │
-│ │ PubMed: 39012345 │ │
-│ │ │ │
-│ │ Abstract: (truncated) │ │
-│ │ We present genome-wide data from 150 ancient individuals from │ │
-│ │ Viking Age Scandinavia, revealing complex patterns of... │ │
-│ │ │ │
-│ │ ┌─ Already in DecodingUs ──────────────────────────────────────┐ │ │
-│ │ │ ✓ This publication exists in our database │ │ │
-│ │ │ Current samples linked: 127 │ │ │
-│ │ │ [View Publication Page] │ │ │
-│ │ └──────────────────────────────────────────────────────────────┘ │ │
-│ │ │ │
-│ │ [Use This Publication] [Cancel] │ │
-│ │ │ │
-│ └────────────────────────────────────────────────────────────────────┘ │
-│ │
-└─────────────────────────────────────────────────────────────────────────┘
-```
-
----
-
-## PDS Sync Protocol
-
-### Record Creation Flow
-
-When syncing a new biosample to PDS:
-
-```
-Navigator PDS AppView
-──────── ─── ───────
- │ │ │
- │ 1. Build Atmosphere records │ │
- │ ────────────────────────▶ │ │
- │ │ │
- │ POST com.atproto.repo.createRecord │
- │ collection: com.decodingus.atmosphere.biosample │
- │ ─────────────────────────────▶ │
- │ │ │
- │ ◀─ { uri, cid } │ │
- │ │ │
- │ 2. Store atUri/atCid locally │ │
- │ │ │
- │ │ Firehose event │
- │ │ ─────────────────────────▶ │
- │ │ │
- │ │ Process biosample │
- │ │ Create DB records │
- │ │ Link to publication │
- │ │ Queue haplogroup work │
- │ │ │
-```
-
-### Multi-Record Transaction
-
-A complete biosample with sequence data requires multiple records:
-
-```scala
-// Pseudo-code for sync operation
-def syncBiosampleToPds(biosample: ComposedBiosample): Future[SyncResult] = {
- for {
- // 1. Create sequence run record first (child)
- sequenceRunUri <- createSequenceRunRecord(biosample.analysisResults)
-
- // 2. Create alignment record (grandchild)
- alignmentUri <- createAlignmentRecord(biosample.analysisResults, sequenceRunUri)
-
- // 3. Create STR profile if available
- strProfileUri <- biosample.analysisResults.strProfile match {
- case Some(str) => createStrProfileRecord(str).map(Some(_))
- case None => Future.successful(None)
- }
-
- // 4. Create biosample record with references
- biosampleUri <- createBiosampleRecord(
- biosample,
- sequenceRunRefs = List(sequenceRunUri),
- strProfileRef = strProfileUri
- )
-
- // 5. Update workspace record to include new biosample
- _ <- updateWorkspaceRecord(biosampleUri)
-
- } yield SyncResult.Success(biosampleUri)
-}
-```
-
-### Conflict Resolution Strategy
-
-```scala
-enum ConflictResolution:
- case KeepLocal // Overwrite PDS with local version
- case AcceptRemote // Discard local changes, pull from PDS
- case Merge // Attempt automatic merge (field-level)
- case Manual // Require user intervention
-
-def resolveConflict(
- local: ComposedBiosample,
- remote: AtmosphereBiosample
-): ConflictResolution = {
-
- // AppView-computed fields always win (haplogroup refinement)
- val appViewFields = Set("haplogroups.yDna", "haplogroups.mtDna")
-
- // If only AppView fields changed remotely, merge
- if (remote.meta.lastModifiedField.exists(appViewFields.contains)) {
- ConflictResolution.Merge
- }
- // If local has newer analysis results, prefer local
- else if (local.analysisResults.isDefined &&
- local.syncState.localVersion > remote.meta.version) {
- ConflictResolution.KeepLocal
- }
- // Otherwise require manual resolution
- else {
- ConflictResolution.Manual
- }
-}
-```
-
----
-
-## API Integration
-
-### DecodingUs Backend Changes
-
-The existing `CitizenBiosampleController` and Firehose handler already support this flow. Minor enhancements needed:
-
-1. **Publication Lookup Endpoint** (new)
- ```
- GET /api/publications/lookup?doi={doi}&pubmed={pubmedId}
- ```
- Returns publication metadata for Navigator's lookup feature.
-
-2. **Batch Validation Endpoint** (new)
- ```
- POST /api/external-biosamples/validate
- ```
- Validates a batch of biosample records without creating them.
-
-3. **Sync Status Endpoint** (new)
- ```
- GET /api/external-biosamples/sync-status?atUris[]={uri1}&atUris[]={uri2}
- ```
- Returns current state of biosamples in AppView (for conflict detection).
-
-### Navigator API Client
-
-New module for AT Protocol and DecodingUs API integration:
-
-```scala
-// AT Protocol client for PDS operations
-trait PdsClient {
- def createRecord[T](collection: String, record: T): Future[CreateRecordResponse]
- def updateRecord[T](uri: String, record: T): Future[UpdateRecordResponse]
- def deleteRecord(uri: String): Future[Unit]
- def getRecord[T](uri: String): Future[Option[T]]
- def listRecords[T](collection: String, cursor: Option[String]): Future[ListRecordsResponse[T]]
-}
-
-// DecodingUs API client for auxiliary operations
-trait DecodingUsClient {
- def lookupPublication(doi: Option[String], pubmedId: Option[String]): Future[Option[Publication]]
- def validateBiosamples(biosamples: Seq[BiosampleValidation]): Future[ValidationResult]
- def getSyncStatus(atUris: Seq[String]): Future[Map[String, SyncStatus]]
-}
-```
-
----
-
-## Implementation Phases
-
-### Phase 1: Local Composition (MVP)
-- Biosample composition panel in Navigator
-- Publication lookup integration
-- Local-only save (no PDS sync yet)
-- Export to JSON for manual API submission
-
-### Phase 2: PDS Sync
-- AT Protocol authentication in Navigator
-- Single-record sync (biosample only)
-- Basic conflict detection
-- Sync status indicators in UI
-
-### Phase 3: Full Record Graph
-- Multi-record sync (sequencerun, alignment, strProfile)
-- Workspace record management
-- Bulk sync operations
-- Background sync with retry
-
-### Phase 4: Bidirectional Sync
-- Pull changes from PDS (AppView updates)
-- Automatic conflict resolution for AppView-computed fields
-- Real-time sync status updates
-- Offline queue with eventual consistency
-
----
-
-## Security Considerations
-
-### Authentication Flow
-
-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│ OAuth 2.0 + DPoP Flow │
-├─────────────────────────────────────────────────────────────────────────┤
-│ │
-│ 1. User clicks "Connect PDS" in Navigator │
-│ 2. Navigator opens browser to PDS authorization URL │
-│ 3. User authenticates with PDS (handle + password or passkey) │
-│ 4. PDS redirects back to Navigator with auth code │
-│ 5. Navigator exchanges code for access token + DPoP key │
-│ 6. Navigator stores refresh token securely (OS keychain) │
-│ 7. Navigator uses access token for API calls │
-│ │
-└─────────────────────────────────────────────────────────────────────────┘
-```
-
-### Data Privacy
-
-- All genomic data stays local until explicit sync
-- Only Atmosphere record metadata synced to PDS
-- File locations can be local paths (not synced) or remote URLs
-- User controls what gets published to their PDS
-
----
-
-## Benefits
-
-### For Researchers
-1. **Unified workflow**: Analysis and submission in one tool
-2. **Local preview**: Review and validate before publishing
-3. **Batch operations**: Import and sync multiple samples efficiently
-4. **Offline capable**: Work without internet, sync later
-5. **Version control**: Track changes, resolve conflicts
-
-### For DecodingUs
-1. **Reduced API complexity**: Firehose handles all ingestion
-2. **Better data quality**: Navigator validates before sync
-3. **Richer metadata**: Full analysis results included
-4. **Provenance tracking**: Clear audit trail via AT Protocol
-
-### For the Ecosystem
-1. **Data sovereignty**: Researchers own their PDS data
-2. **Interoperability**: Standard AT Protocol records
-3. **Decentralization**: No single point of failure
-4. **Transparency**: Public record of contributions
-
----
-
-## Cross-Researcher Deduplication
-
-### The Problem
-
-Many researchers work with the same canonical datasets:
-- **1000 Genomes Project**: ~3,200 samples widely used in population genetics
-- **Human Genome Diversity Project (HGDP)**: ~900 samples
-- **Simons Genome Diversity Project**: ~300 samples
-- **Ancient DNA publications**: Shared samples across meta-analyses
-
-When multiple researchers sync these samples to their PDS, the AppView receives duplicate records for the same biological sample from different sources.
-
-### Deduplication Model
-
-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│ CANONICAL SAMPLE REGISTRY │
-├─────────────────────────────────────────────────────────────────────────┤
-│ │
-│ Canonical Sample: HG00096 (1000 Genomes) │
-│ ══════════════════════════════════════════ │
-│ │
-│ ┌─ Authoritative Identity ─────────────────────────────────────────┐ │
-│ │ Canonical Accession: HG00096 │ │
-│ │ Registry: 1000GENOMES │ │
-│ │ ENA Accession: SAMEA3302682 │ │
-│ │ BioSample: SAMN00001598 │ │
-│ └──────────────────────────────────────────────────────────────────┘ │
-│ │
-│ ┌─ Researcher Contributions ───────────────────────────────────────┐ │
-│ │ │ │
-│ │ did:plc:alice → at://did:plc:alice/.../biosample/hg00096 │ │
-│ │ Analysis: 32x coverage, haplogroup R-L21 │ │
-│ │ Files: local analysis only │ │
-│ │ │ │
-│ │ did:plc:bob → at://did:plc:bob/.../biosample/1kg-hg00096 │ │
-│ │ Analysis: 45x coverage (deep WGS) │ │
-│ │ Files: s3://bob-lab/HG00096.cram │ │
-│ │ │ │
-│ │ did:plc:carol → at://did:plc:carol/.../biosample/hg00096 │ │
-│ │ Analysis: haplogroup R-L21>FT12345 (novel) │ │
-│ │ STR Profile: Y-111 │ │
-│ │ │ │
-│ └───────────────────────────────────────────────────────────────────┘ │
-│ │
-│ ┌─ Merged View (AppView Computed) ─────────────────────────────────┐ │
-│ │ Best Coverage: 45x (from did:plc:bob) │ │
-│ │ Refined Haplogroup: R-L21>FT12345 (from did:plc:carol) │ │
-│ │ STR Profile: Y-111 markers (from did:plc:carol) │ │
-│ │ Contributing Researchers: 3 │ │
-│ └──────────────────────────────────────────────────────────────────┘ │
-│ │
-└─────────────────────────────────────────────────────────────────────────┘
-```
-
-### Canonical Accession Resolution
-
-The AppView maintains a registry of known canonical sample identifiers:
-
-```scala
-case class CanonicalSampleRegistry(
- registryCode: String, // "1000GENOMES", "HGDP", "SGDP", "ENA", "NCBI"
- pattern: Regex, // Pattern to match accessions
- normalizeFn: String => String // Normalize variations (HG00096 vs hg00096)
-)
-
-val knownRegistries = Seq(
- CanonicalSampleRegistry(
- "1000GENOMES",
- """^(HG|NA)\d{5}$""".r,
- _.toUpperCase
- ),
- CanonicalSampleRegistry(
- "HGDP",
- """^HGDP\d{5}$""".r,
- _.toUpperCase
- ),
- CanonicalSampleRegistry(
- "ENA",
- """^SAM[END]A?\d+$""".r,
- _.toUpperCase
- ),
- CanonicalSampleRegistry(
- "NCBI_BIOSAMPLE",
- """^SAMN\d+$""".r,
- _.toUpperCase
- )
-)
-
-def resolveCanonicalId(sampleAccession: String): Option[CanonicalIdentity] = {
- knownRegistries.collectFirst {
- case reg if reg.pattern.matches(sampleAccession) =>
- CanonicalIdentity(
- registry = reg.registryCode,
- canonicalAccession = reg.normalizeFn(sampleAccession)
- )
- }
-}
-```
-
-### Database Schema for Deduplication
-
-```sql
--- Canonical sample identity (one per biological sample)
-CREATE TABLE canonical_sample (
- id SERIAL PRIMARY KEY,
- registry VARCHAR(50) NOT NULL, -- '1000GENOMES', 'HGDP', 'ENA'
- canonical_accession VARCHAR(255) NOT NULL,
-
- -- Cross-references to other registries
- ena_accession VARCHAR(50),
- ncbi_biosample VARCHAR(50),
-
- -- Merged/computed best values
- best_coverage FLOAT,
- best_coverage_source_at_uri TEXT,
- refined_y_haplogroup TEXT,
- refined_y_haplogroup_source_at_uri TEXT,
- refined_mt_haplogroup TEXT,
- refined_mt_haplogroup_source_at_uri TEXT,
-
- -- Tracking
- contributor_count INT DEFAULT 0,
- created_at TIMESTAMP DEFAULT NOW(),
- updated_at TIMESTAMP DEFAULT NOW(),
-
- UNIQUE(registry, canonical_accession)
-);
-
--- Link between canonical samples and researcher contributions
-CREATE TABLE canonical_sample_contribution (
- id SERIAL PRIMARY KEY,
- canonical_sample_id INT REFERENCES canonical_sample(id),
-
- -- The researcher's PDS record
- contributor_did TEXT NOT NULL,
- biosample_at_uri TEXT NOT NULL,
- biosample_at_cid TEXT,
-
- -- What this contribution provides
- coverage FLOAT,
- y_haplogroup TEXT,
- mt_haplogroup TEXT,
- has_str_profile BOOLEAN DEFAULT FALSE,
- has_private_variants BOOLEAN DEFAULT FALSE,
-
- -- File availability
- files_accessible BOOLEAN DEFAULT FALSE, -- Can AppView access the files?
-
- created_at TIMESTAMP DEFAULT NOW(),
- updated_at TIMESTAMP DEFAULT NOW(),
-
- UNIQUE(canonical_sample_id, contributor_did)
-);
-
--- Index for fast lookup during Firehose processing
-CREATE INDEX idx_canonical_sample_accession
- ON canonical_sample(registry, canonical_accession);
-```
-
-### Firehose Event Handling with Deduplication
-
-```scala
-def handleBiosampleCreate(event: BiosampleCreateEvent): Future[ProcessingResult] = {
- val biosample = event.record
-
- // 1. Check if this matches a canonical registry
- val canonicalId = resolveCanonicalId(biosample.sampleAccession)
-
- canonicalId match {
- case Some(canonical) =>
- // This is a known canonical sample (1KG, HGDP, etc.)
- handleCanonicalSampleContribution(canonical, biosample, event.citizenDid)
-
- case None =>
- // Novel sample - check for cross-researcher duplicates by other means
- handleNovelSample(biosample, event.citizenDid)
- }
-}
-
-def handleCanonicalSampleContribution(
- canonical: CanonicalIdentity,
- biosample: AtmosphereBiosample,
- contributorDid: String
-): Future[ProcessingResult] = {
-
- for {
- // Find or create canonical sample record
- canonicalSample <- canonicalSampleRepo.findOrCreate(
- canonical.registry,
- canonical.canonicalAccession
- )
-
- // Record this researcher's contribution
- contribution <- contributionRepo.upsert(
- CanonicalSampleContribution(
- canonicalSampleId = canonicalSample.id,
- contributorDid = contributorDid,
- biosampleAtUri = biosample.atUri,
- biosampleAtCid = biosample.meta.atCid,
- coverage = biosample.extractCoverage(),
- yHaplogroup = biosample.haplogroups.flatMap(_.yDna.map(_.haplogroupName)),
- mtHaplogroup = biosample.haplogroups.flatMap(_.mtDna.map(_.haplogroupName)),
- hasStrProfile = biosample.strProfileRef.isDefined,
- hasPrivateVariants = biosample.hasPrivateVariants()
- )
- )
-
- // Recompute merged "best" values
- _ <- recomputeCanonicalSampleMergedValues(canonicalSample.id)
-
- } yield ProcessingResult.CanonicalContribution(
- canonicalSampleId = canonicalSample.id,
- isNewContributor = contribution.isNew,
- improvedFields = contribution.improvements
- )
-}
-```
-
-### Merged Value Computation
-
-When multiple researchers contribute data for the same canonical sample:
-
-```scala
-def recomputeCanonicalSampleMergedValues(canonicalSampleId: Int): Future[Unit] = {
- for {
- contributions <- contributionRepo.findByCanonicalSample(canonicalSampleId)
-
- // Best coverage = highest value
- bestCoverage = contributions
- .filter(_.coverage.isDefined)
- .maxByOption(_.coverage.get)
-
- // Best haplogroup = most refined (deepest tree depth)
- bestYHaplogroup = contributions
- .flatMap(c => c.yHaplogroup.map(h => (c, h)))
- .maxByOption { case (_, hg) => haplogroupTreeDepth(hg) }
-
- bestMtHaplogroup = contributions
- .flatMap(c => c.mtHaplogroup.map(h => (c, h)))
- .maxByOption { case (_, hg) => haplogroupTreeDepth(hg) }
-
- // Update canonical sample with merged values
- _ <- canonicalSampleRepo.update(
- canonicalSampleId,
- CanonicalSampleUpdate(
- bestCoverage = bestCoverage.flatMap(_.coverage),
- bestCoverageSourceAtUri = bestCoverage.map(_.biosampleAtUri),
- refinedYHaplogroup = bestYHaplogroup.map(_._2),
- refinedYHaplogroupSourceAtUri = bestYHaplogroup.map(_._1.biosampleAtUri),
- refinedMtHaplogroup = bestMtHaplogroup.map(_._2),
- refinedMtHaplogroupSourceAtUri = bestMtHaplogroup.map(_._1.biosampleAtUri),
- contributorCount = contributions.map(_.contributorDid).distinct.size
- )
- )
- } yield ()
-}
-```
-
-### Navigator UI: Duplicate Detection
-
-When a researcher imports a sample, Navigator checks for existing canonical samples:
-
-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│ Import Sample: HG00096 │
-├─────────────────────────────────────────────────────────────────────────┤
-│ │
-│ ┌─ Canonical Sample Detected ────────────────────────────────────────┐ │
-│ │ │ │
-│ │ ⚠ This sample exists in the 1000 Genomes Project registry │ │
-│ │ │ │
-│ │ Canonical ID: HG00096 │ │
-│ │ Registry: 1000 Genomes Project (Phase 3) │ │
-│ │ ENA Accession: SAMEA3302682 │ │
-│ │ Population: GBR (British) │ │
-│ │ │ │
-│ │ ┌─ Existing Contributions in DecodingUs ───────────────────────┐ │ │
-│ │ │ 3 researchers have contributed analysis for this sample: │ │ │
-│ │ │ │ │ │
-│ │ │ • Best coverage: 45x (from did:plc:bob) │ │ │
-│ │ │ • Y-DNA: R-L21 (2 contributors agree) │ │ │
-│ │ │ • mtDNA: H1a (3 contributors agree) │ │ │
-│ │ │ • STR Profile: Available (Y-111) │ │ │
-│ │ └───────────────────────────────────────────────────────────────┘ │ │
-│ │ │ │
-│ │ Your contribution will be added to the merged record. │ │
-│ │ │ │
-│ └────────────────────────────────────────────────────────────────────┘ │
-│ │
-│ What would you like to do? │
-│ │
-│ (•) Add my analysis as a new contribution │
-│ Your haplogroup calls and coverage will be compared with │
-│ existing data. Novel findings (deeper haplogroups, private │
-│ SNPs) will be highlighted. │
-│ │
-│ ( ) Skip this sample (already well-characterized) │
-│ │
-│ ( ) Import anyway as a separate local sample │
-│ (Will not sync to PDS) │
-│ │
-│ [Continue Import] [Cancel] │
-│ │
-└─────────────────────────────────────────────────────────────────────────┘
-```
-
-### Contribution Value Indicator
-
-Show researchers what value their contribution adds:
-
-```
-┌─────────────────────────────────────────────────────────────────────────┐
-│ Contribution Analysis: HG00096 │
-├─────────────────────────────────────────────────────────────────────────┤
-│ │
-│ Your Analysis Results: │
-│ ┌─────────────────────┬──────────────────┬────────────────────────┐ │
-│ │ Field │ Your Value │ Current Best │ │
-│ ├─────────────────────┼──────────────────┼────────────────────────┤ │
-│ │ Coverage │ 32x │ 45x (did:plc:bob) │ │
-│ │ Y-DNA Haplogroup │ R-L21>FT54321 🆕 │ R-L21 │ │
-│ │ mtDNA Haplogroup │ H1a │ H1a (same) │ │
-│ │ STR Profile │ Y-67 │ Y-111 (more markers) │ │
-│ │ Private Variants │ 2 novel SNPs 🆕 │ None detected │ │
-│ └─────────────────────┴──────────────────┴────────────────────────┘ │
-│ │
-│ ┌─ Contribution Value ───────────────────────────────────────────────┐ │
-│ │ │ │
-│ │ ✓ Your Y-DNA haplogroup is MORE REFINED than current best │ │
-│ │ R-L21 → R-L21>FT54321 (new terminal SNP!) │ │
-│ │ │ │
-│ │ ✓ You discovered 2 NOVEL PRIVATE VARIANTS │ │
-│ │ These will be submitted to the Haplogroup Discovery System │ │
-│ │ │ │
-│ │ ○ Your coverage (32x) does not improve on current best (45x) │ │
-│ │ │ │
-│ │ ○ Your STR profile (Y-67) has fewer markers than current (Y-111) │ │
-│ │ │ │
-│ └────────────────────────────────────────────────────────────────────┘ │
-│ │
-│ [Sync Contribution] [Cancel] │
-│ │
-└─────────────────────────────────────────────────────────────────────────┘
-```
-
-### API Endpoints for Deduplication
-
-```
-# Check if a sample accession is canonical
-GET /api/canonical-samples/lookup?accession={accession}
-Response: {
- "isCanonical": true,
- "registry": "1000GENOMES",
- "canonicalAccession": "HG00096",
- "crossReferences": {
- "ena": "SAMEA3302682",
- "ncbiBiosample": "SAMN00001598"
- },
- "contributorCount": 3,
- "mergedValues": {
- "bestCoverage": 45.0,
- "refinedYHaplogroup": "R-L21",
- "refinedMtHaplogroup": "H1a",
- "hasStrProfile": true
- }
-}
-
-# Get all contributions for a canonical sample
-GET /api/canonical-samples/{registry}/{accession}/contributions
-Response: {
- "canonicalAccession": "HG00096",
- "contributions": [
- {
- "contributorDid": "did:plc:alice",
- "biosampleAtUri": "at://did:plc:alice/.../biosample/hg00096",
- "coverage": 32.0,
- "yHaplogroup": "R-L21>FT54321",
- "mtHaplogroup": "H1a",
- "hasStrProfile": true,
- "hasPrivateVariants": true,
- "contributedAt": "2025-12-07T10:30:00Z"
- },
- // ...
- ]
-}
-
-# Preview contribution value before sync
-POST /api/canonical-samples/preview-contribution
-Request: {
- "sampleAccession": "HG00096",
- "coverage": 32.0,
- "yHaplogroup": "R-L21>FT54321",
- "mtHaplogroup": "H1a",
- "strMarkerCount": 67,
- "privateVariantCount": 2
-}
-Response: {
- "isCanonical": true,
- "improvements": [
- { "field": "yHaplogroup", "current": "R-L21", "yours": "R-L21>FT54321", "isImprovement": true },
- { "field": "privateVariants", "current": 0, "yours": 2, "isImprovement": true }
- ],
- "noChange": [
- { "field": "mtHaplogroup", "value": "H1a" }
- ],
- "notBest": [
- { "field": "coverage", "current": 45.0, "yours": 32.0 },
- { "field": "strMarkerCount", "current": 111, "yours": 67 }
- ]
-}
-```
-
-### Conflict Resolution for Canonical Samples
-
-When contributions disagree:
-
-```scala
-case class HaplogroupDisagreement(
- canonicalSampleId: Int,
- field: String, // "yHaplogroup" or "mtHaplogroup"
- values: Map[String, Set[String]], // haplogroup -> Set of contributor DIDs
- suggestedResolution: Option[String],
- resolutionReason: Option[String]
-)
-
-def detectHaplogroupDisagreements(canonicalSampleId: Int): Future[Seq[HaplogroupDisagreement]] = {
- for {
- contributions <- contributionRepo.findByCanonicalSample(canonicalSampleId)
-
- yHaplogroupGroups = contributions
- .flatMap(c => c.yHaplogroup.map(h => (h, c.contributorDid)))
- .groupBy(_._1)
- .view.mapValues(_.map(_._2).toSet).toMap
-
- yDisagreement = if (yHaplogroupGroups.size > 1) {
- // Check if disagreements are just refinement levels
- val baseHaplogroups = yHaplogroupGroups.keys.map(extractBaseHaplogroup).toSet
- if (baseHaplogroups.size == 1) {
- // All agree on base, just different refinement levels
- val mostRefined = yHaplogroupGroups.keys.maxBy(haplogroupTreeDepth)
- Some(HaplogroupDisagreement(
- canonicalSampleId,
- "yHaplogroup",
- yHaplogroupGroups,
- suggestedResolution = Some(mostRefined),
- resolutionReason = Some("Most refined call, compatible with others")
- ))
- } else {
- // True disagreement - needs manual review
- Some(HaplogroupDisagreement(
- canonicalSampleId,
- "yHaplogroup",
- yHaplogroupGroups,
- suggestedResolution = None,
- resolutionReason = Some("Conflicting base haplogroups - curator review needed")
- ))
- }
- } else None
-
- } yield Seq(yDisagreement, mtDisagreement).flatten
-}
-```
-
----
-
-## Open Questions
-
-1. **File storage**: Should sequence files (BAM/CRAM) be referenced by local path, remote URL, or uploaded to blob storage?
-
-2. **Project visibility**: Should `project` records be public or private by default?
-
-3. **Batch limits**: What's the maximum number of records to sync in one operation?
-
-4. **Offline duration**: How long should Navigator queue changes before warning about potential conflicts?
-
-5. **AppView authority**: Should AppView-computed updates (haplogroup refinement) automatically overwrite local values?
-
-6. **Canonical registry maintenance**: Who maintains the list of known canonical registries (1KG, HGDP, etc.) and their accession patterns?
-
-7. **Contribution attribution**: How should we display multi-researcher contributions on the public biosample page?
-
-8. **Disagreement handling**: When researchers disagree on haplogroup calls, should the AppView auto-resolve or flag for curator review?
-
----
-
-## Related Documents
-
-- [Atmosphere Lexicon Design](../Atmosphere_Lexicon.md) - Record schemas
-- [Group Project System](./group-project-system.md) - Project membership model
-- [Haplogroup Discovery System](../planning/haplogroup-discovery-system.md) - Private variant flow
diff --git a/documents/proposals/triage-report.md b/documents/proposals/triage-report.md
new file mode 100644
index 00000000..9812a765
--- /dev/null
+++ b/documents/proposals/triage-report.md
@@ -0,0 +1,220 @@
+# Proposals ↔ Rust Triage Report
+
+**Started:** 2026-06-07. Companion to `../planning/design-doc-triage-report.md`,
+same method: compare each `documents/proposals/` doc against the Rust
+implementation and record a verdict + recommended action **for later action**.
+
+**Verdict legend:** ✅ current · 📝 update doc · 🔧 make code compliant · ⚖️ split ·
+🗑️ remove (superseded/realized/dropped).
+
+## Execution log (2026-06-07)
+
+- **Removed:** #1 variant-schema-simplification, #2 haplogroup-tree-merge-api,
+ #8 pds-workbench-biosample-flow. Inbound refs rewired in `planning/`
+ (haplogroup-discovery-system → "realized in `core.variant`"; design-roadmap
+ Bucket-B list drops pds-workbench).
+- **Headers added** to the kept proposals: #3 (realized; kept for methodology),
+ #4 (forward; reconciled by D5), #5/#6 (forward Bucket B), #7 (deferred). #3's
+ dead `appview-pds-backfeed-system.md` refs and #4's dead `ibd-matching-system.md`
+ ref were rewired.
+- **#7 Patronage kept as deferred** (revive past ~a few hundred active users).
+
+## Status index
+
+| # | Proposal | Verdict | Action |
+|---|----------|---------|--------|
+| 1 | variant-schema-simplification.md | 🗑️ REMOVED (realized) | done |
+| 2 | haplogroup-tree-merge-api-proposal.md | 🗑️ REMOVED (realized + extended) | done |
+| 3 | branch-age-estimation.md | 📝 Keep (realized; methodology ref) | header + ref fix ✓ |
+| 4 | group-project-system.md | 📝 Keep + reconcile via D5 | header + ref fix ✓ |
+| 5 | Messaging_and_Feed_System.md | 📝 Keep + reconcile (forward; no-PII) | header ✓ |
+| 6 | Reputation_System_Implementation.md | 📝 Keep + reconcile (forward) | header ✓ |
+| 7 | Patronage_Donation_System.md | 📝 Keep (deferred — revive at scale) | header ✓ |
+| 8 | pds-workbench-biosample-flow.md | 🗑️ REMOVED (Navigator-side) | done |
+
+---
+
+## 1. variant-schema-simplification.md — 🗑️ Remove (realized)
+
+**Compared against:** `core.variant` (mig 0002, universal JSONB coordinates/aliases),
+`du_db::variant`, the YBrowse ingestion pipeline.
+
+**Finding:** the doc is already marked **"✅ Implemented" (2025-12-14)** and is just a
+thin documentation index for the Scala `variant_v2` model. The Rust rewrite realized
+it as `core.variant` (single row per site, JSONB `coordinates`/`aliases`,
+`ON CONFLICT` batch upserts in the YBrowse pipeline). Nothing to keep — it's a
+realized-status pointer with the Scala table name.
+
+**Inbound ref to fix on removal:** `planning/haplogroup-discovery-system.md` cites it
+as the variant-schema prerequisite ("See: documents/proposals/variant-schema-simplification.md").
+Its other links point at `schema/` + `deployment/` guides (separate doc sets, their
+own passes).
+
+**Recommended action:** **remove**; reword the discovery-system prereq line to
+"realized in `core.variant` (mig 0002)" instead of linking this file.
+
+---
+
+## 2. haplogroup-tree-merge-api-proposal.md — 🗑️ Remove (realized + extended)
+
+**Compared against:** `/manage/haplogroups/merge` + `/merge/preview`
+(`routes/versioning.rs`), `du_domain::merge` (Identify-Match-Graft),
+`du_db::snp_graft`, `tree.haplogroup.provenance` JSONB, change-sets + `/curator/reviews`.
+
+**Finding:** the proposal's core is **realized**: variant/SNP-based node matching,
+the `provenance` JSONB on `tree.haplogroup` (used for backbone/aliases/credit), the
+`/manage/haplogroups/merge[/preview]` endpoints with dry-run, and split detection.
+The Rust impl **went further**: merges materialize into reviewable **change-sets**
+(not direct writes), and external source trees graft into the **ISOGG foundation**
+via **SNP-anchored grafting** (`du_db::snp_graft`, `--graft`/`--reattach`) with the
+`/curator/reviews` (`wip_*`) resolution flow — documented in `rust/README.md` and
+the reconciled `planning/tree-versioning-system.md` + the curator guide. The
+proposal itself is Scala (Tapir, `evolutions/52.sql`, `app/...`).
+
+**Recommended action:** **remove** — realized and better-documented by the rust
+README (SNP-graft) + tree-versioning-system.md + the curator guide. (Check inbound
+refs on removal.)
+
+---
+
+## 3. branch-age-estimation.md — 📝 Kept (realized; methodology reference)
+
+**Compared against:** mig 0013 (`tree.haplogroup_ancestral_str`) + 0014 (combined
+age), `tree.genealogical_anchor`, `genomics.str_mutation_rate`,
+`genomics.biosample_callable_loci`, `du_db::age` (`combine`,
+`recompute_combined_ages`), `du_db::ystr`, the `branch-age-recompute` job, and
+`/api/v1/haplogroups/{name}/age`.
+
+**Finding:** essentially **fully realized**. **Every** table the proposal designs
+exists (ancestral STR, genealogical anchor, STR mutation rate, per-sample callable
+loci), plus the SNP+STR combined-age compute, the weekly→daily recompute job, and
+the age API. Caveats: the combination is **inverse-variance** (a simplification of
+the doc's full PDF-multiplication `P(t|e)=k∏P(t|eᵢ)`), and genealogical-anchor
+wiring into the combine may be partial. The doc is Scala (`evolutions/48.sql`,
+`BranchAgeEstimationService`, etc.) and **references the now-removed
+`appview-pds-backfeed-system.md`** (lines 21, 204).
+
+**Recommended action:** **remove** (built; the McDonald framework lives in
+`du_db::age`/`ystr` + mig 0013/0014) — **but confirm**, since the doc carries
+non-trivial *scientific methodology reference* (mutation rates, multi-step STR
+frequencies, expected-precision tables, the McDonald port pointers) that isn't
+fully captured in code. If kept instead: add a Rust-status header and drop the two
+backfeed references.
+
+---
+
+## 4. group-project-system.md — 📝 Keep + reconcile via D5
+
+**Compared against:** `d5-group-project-reconciliation.md` (read in full),
+`social` placeholder schema (mig 0009). Unbuilt.
+
+**Finding:** this is the member-sovereign group-project proposal that **D5 exists to
+reconcile** with D1–D4. Per D5: it **supersedes the proposal's governance/membership
+sections** (now the AppView-enforced `research.project`/`project_member` ACL),
+**adopts** its roles/policies/succession, **maps** its aggregate records
+(`projectTreeView`/`projectModal`/`strComparison`) onto D4's R1/R2 rails, and treats
+its member-sovereign visibility model as the **post-claim** state. The platform is
+**forward** (`social` placeholder only). The proposal is AT-Proto/lexicon-focused
+(no Scala). It **links the now-removed `ibd-matching-system.md`** (line 6).
+
+**Recommended action:** **keep** (D5 builds on it; platform unbuilt) — add a header
+pointing to D5 as the authoritative reconciliation and noting it's forward; fix the
+dangling `ibd-matching-system.md` link → D1/D3.
+
+---
+
+## 5. Messaging_and_Feed_System.md — 📝 Keep + reconcile (forward)
+
+**Compared against:** `social.{user_block, conversation, message, feed_post}`
+(mig 0009). Schema present; **zero logic** (only the static `/reputation` page exists).
+
+**Finding:** forward Bucket-B social design (Scala/Slick). The schema is in place but
+unbuilt. **Reconciliation needed with the no-PII direction:** the proposal stores
+**DMs centrally as plaintext** (`social.message.content`) — that conflicts with the
+"AppView holds no PII" invariant. Under the new model, DMs should ride the **D1
+encrypted relay** (or AT-Proto records), not a central plaintext mailbox; the public
+feed (AT-Proto `feed.post` records + AppView index) is consistent. Also reuses the
+Reputation system (#6) and should reconcile with D4 assertion threads (roadmap).
+
+**Recommended action:** **keep** — add a forward/Bucket-B header noting: schema
+exists (mig 0009), logic unbuilt, refresh Slick→Rust, and **rework DM transport to
+D1/AT-Proto (no central plaintext)**. Build after the social layer is scheduled.
+
+---
+
+## 6. Reputation_System_Implementation.md — 📝 Keep + reconcile (forward)
+
+**Compared against:** `social.{reputation_event_type, reputation_event,
+user_reputation_score}` (mig 0009), the public `/reputation` page (static content,
+no backend).
+
+**Finding:** forward Bucket-B design (Scala/Slick). The schema matches (singularized
+table names) and is in place; the service/guard logic is **unbuilt**. The
+user-facing `/reputation` page already describes the system. Lower priority
+(roadmap: "depends on social being live").
+
+**Recommended action:** **keep** — add a forward/Bucket-B header (schema exists
+mig 0009; logic unbuilt; refresh Slick→Rust). No code action now.
+
+---
+
+## 7. Patronage_Donation_System.md — 📝 Keep (deferred — revive at scale)
+
+**Compared against:** `rust/README.md` (billing not in production today) and the FAQ
+(`/faq` lists a "Patronage Donation System" under sustainability).
+
+**Finding:** a Scala/Play + Stripe donation-tier design. Not in scope for the
+current rewrite, **but explicitly deferred, not dead** — per the owner, patronage/
+billing will likely return to fund infrastructure once the platform crosses ~a few
+hundred active users. The FAQ already names it as the sustainability path. Only
+Scala/payment specifics are stale.
+
+**Recommended action:** **keep** as a deferred proposal — add a light header:
+"deferred; revive when active users cross ~a few hundred; refresh the
+Scala/Play/Stripe specifics to the Rust stack at that time." No code action.
+
+---
+
+## 8. pds-workbench-biosample-flow.md — 🗑️ REMOVED (Navigator-side)
+
+**Compared against:** the Jetstream ingest reality (`fed.biosample` + the du-jobs
+Jetstream consumer), D2 (`research_subject` model), biosample consolidation.
+*(Triaged from the overview + the known ingest model — 65 KB, not read in full.)*
+
+**Finding:** this is **predominantly a Navigator (Edge / DUNavigator) design** —
+the desktop workspace, local GATK pipeline, project organization, and PDS sync. Its
+AppView-relevant slice (researchers' biosamples reaching the AppView) is the
+ingest, and that's **realized differently**: the "Current State" REST APIs it cites
+(`POST /api/private/external/biosamples`, `/api/external-biosamples`) are Scala-era
+and **don't exist** — ingest is now the outbound **Jetstream → `fed.biosample`**
+mirror. It also **predates the D2 ResearchSubject / consolidation model**.
+
+**Recommended action:** **remove from this repo** (Navigator-side + superseded for
+the AppView) — **but confirm**: if its Edge-workflow detail still has value, it
+belongs in the **DUNavigator** repo, so consider relocating rather than deleting.
+
+---
+
+## Summary
+
+| Verdict | Proposals |
+|---------|-----------|
+| 🗑️ Removed (realized) | #1 variant-schema-simplification, #2 tree-merge-api |
+| 🗑️ Removed (Navigator-side) | #8 pds-workbench |
+| 📝 Kept (realized; methodology reference) | #3 branch-age-estimation |
+| 📝 Kept + reconcile (forward Bucket B) | #4 group-project (via D5), #5 messaging, #6 reputation |
+| 📝 Kept (deferred — revive at scale) | #7 Patronage |
+
+**Themes**
+- As in the planning set, **no code is wrong** — verdicts are remove (realized/
+ dropped) or keep-and-reconcile (forward).
+- **Realized & removable:** the variant model, the tree-merge API, and branch age
+ are all built (schema + compute + endpoints); their proposals are historical.
+- **Forward Bucket B (keep):** group-project (D5 reconciles it), messaging,
+ reputation — all have `social`/`research` placeholder schema (mig 0009) but no
+ logic. Headers should mark them forward + reconcile to Rust and the **no-PII /
+ D1–D5** model (esp. messaging: DMs must not be central plaintext).
+- **Confirm before deleting:** #3 (scientific methodology reference) and #8 (large,
+ Navigator-side — relocate vs delete).
+- **Refs to fix on removal:** `planning/haplogroup-discovery-system.md` cites #1;
+ `#4` links the removed `ibd-matching-system.md`; check inbound refs for #2/#3/#8.
diff --git a/documents/proposals/variant-schema-simplification.md b/documents/proposals/variant-schema-simplification.md
deleted file mode 100644
index 48b56737..00000000
--- a/documents/proposals/variant-schema-simplification.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# Proposal: Variant Schema Simplification (Implemented)
-
-**Status:** ✅ Implemented
-**Date:** 2025-12-14
-
-This proposal has been fully implemented. The documentation has been split into focused guides for operational use.
-
-## Documentation Index
-
-| Topic | Document | Description |
-|-------|----------|-------------|
-| **Schema Design** | [Universal Variant Schema](../schema/universal-variant-schema.md) | Technical reference for the `variant_v2` table, JSONB structures, and multi-reference model. |
-| **Migration** | [Migration Guide](../deployment/variant-migration-guide.md) | Instructions for migrating legacy data to the new schema and dropping old tables. |
-| **Performance** | [Performance Tuning](../deployment/performance-tuning-variant-ingestion.md) | Lessons learned optimizing the GFF ingestion pipeline (batch upserts, indexing). |
-| **Naming** | [Naming Authority](../planning/variant-naming-authority.md) | Workflows for assigning `DU` names to novel variants. |
-
----
-
-## Executive Summary of Changes
-
-The project successfully migrated from a "row-per-reference" model to a **Universal Variant Model** using `variant_v2`.
-
-* **Unified Storage**: A single database row now represents a variant, with coordinates for multiple assemblies (GRCh37, GRCh38, hs1) stored in a `coordinates` JSONB column.
-* **Flexible Aliases**: `aliases` JSONB column replaces the rigid `variant_alias` table.
-* **High-Performance Ingestion**: The `YBrowseVariantIngestionService` uses optimized batch upserts (`INSERT ... ON CONFLICT`) to handle millions of variants efficiently.
-* **Pangenome Ready**: The JSONB coordinate structure allows for future addition of graph-based coordinates without schema changes.
\ No newline at end of file
diff --git a/documents/register.mermaid b/documents/register.mermaid
deleted file mode 100755
index e42590ea..00000000
--- a/documents/register.mermaid
+++ /dev/null
@@ -1,38 +0,0 @@
-sequenceDiagram
- participant R_Edge as "Researcher (JVM Edge App)"
- participant R_PDS as "Researcher's PDS"
- participant ScalaApp as "App Server (Scala/Play)"
- participant MetadataDB as "T4 Metadata DB (DID Registry)"
-
- title PDS Registration and Sync Setup
-
- R_Edge->>R_PDS: 1. Login: com.atproto.server.createSession(handle, password)
- activate R_PDS
- R_PDS-->>R_Edge: 2. Response: Auth Token (R_Token), DID (did:plc:XYZ)
- deactivate R_PDS
-
- R_Edge->>R_PDS: 3. Verify Identity: com.atproto.identity.resolveHandle
- activate R_PDS
- R_PDS-->>R_Edge: 4. Response: DID Document (Confirms PDS Endpoint)
- deactivate R_PDS
-
- R_Edge->>ScalaApp: 5. Registration Request: POST /api/registerPDS(DID, R_Token, PDS_URL)
- activate ScalaApp
-
- ScalaApp->>R_PDS: 6. *Server-Side Verification*: com.atproto.repo.getLatestCommit (Using R_Token)
- activate R_PDS
- R_PDS-->>ScalaApp: 7. Response: Latest Commit CID, Repo Root
- deactivate R_PDS
-
- ScalaApp->>ScalaApp: 8. Validation: Confirm DID is valid and PDS is responsive
-
- ScalaApp->>MetadataDB: 9. Write New DID Record: INSERT(DID, PDS_URL, Initial_Cursor=0)
- activate MetadataDB
- MetadataDB-->>ScalaApp: 10. Success
- deactivate MetadataDB
-
- ScalaApp-->>R_Edge: 11. Final Response: Registration Success
- deactivate ScalaApp
-
- ScalaApp->>ScalaApp: 12. Trigger Internal Notification (e.g., Pekko Pub/Sub)
- Note over ScalaApp, MetadataDB: Rust Sync Cluster detects new entry in Metadata DB (next poll) and begins monitoring.
diff --git a/project/build.properties b/project/build.properties
deleted file mode 100644
index cc68b53f..00000000
--- a/project/build.properties
+++ /dev/null
@@ -1 +0,0 @@
-sbt.version=1.10.11
diff --git a/project/plugins.sbt b/project/plugins.sbt
deleted file mode 100644
index 520ba80a..00000000
--- a/project/plugins.sbt
+++ /dev/null
@@ -1,2 +0,0 @@
-addSbtPlugin("org.playframework" % "sbt-plugin" % "3.0.9")
-addSbtPlugin("org.scoverage" % "sbt-scoverage" % "2.2.2")
diff --git a/public/images/decoding_us_logo_placeholder.png b/public/images/decoding_us_logo_placeholder.png
deleted file mode 100644
index f711e6e8..00000000
Binary files a/public/images/decoding_us_logo_placeholder.png and /dev/null differ
diff --git a/public/javascripts/main.js b/public/javascripts/main.js
deleted file mode 100644
index e69de29b..00000000
diff --git a/public/stylesheets/main.css b/public/stylesheets/main.css
deleted file mode 100644
index 6dbd5ef1..00000000
--- a/public/stylesheets/main.css
+++ /dev/null
@@ -1,21 +0,0 @@
-html {
- scroll-behavior: smooth;
-}
-
-body {
- padding-top: 56px;
- padding-bottom: 56px;
-}
-
-/* Override Bootstrap's default pink/cyan code styling to use gray */
-code {
- color: #495057;
- background-color: #f8f9fa;
-}
-
-.logo-placeholder {
- max-width: 900px; /* Increased for more horizontal span */
- display: block;
- margin: 0 auto;
- padding: 20px 0; /* Add some vertical spacing */
-}
\ No newline at end of file
diff --git a/rust/.env.example b/rust/.env.example
new file mode 100644
index 00000000..54e8d2fe
--- /dev/null
+++ b/rust/.env.example
@@ -0,0 +1,83 @@
+# DecodingUs AppView (Rust) — example environment.
+#
+# The app does NOT auto-load this file; it reads variables from the shell. Copy to
+# `.env` (gitignored) and source it, or export the ones you need before running:
+#
+# set -a; . ./.env; set +a
+# cargo run -p du-web
+#
+# Only DATABASE_URL is strictly required to boot; everything else has a safe default
+# or degrades gracefully (the dependent feature is simply disabled when unset).
+
+# ── Core (du-web) ─────────────────────────────────────────────────────────────
+
+# Postgres + PostGIS connection. Required. The dev DB runs via `./scripts/test-db.sh up`.
+DATABASE_URL=postgres://postgres:dev@localhost:5432/decodingus?sslmode=disable
+
+# Cookie/session signing secret. Optional in dev (an insecure default is used), but
+# MUST be set to a strong random value (32+ chars) in production. It is padded/repeated
+# to 64 bytes, so longer is fine.
+APP_SECRET=change-me-to-a-long-random-string-32-plus-chars
+
+# HTTP listen port. Default: 9000.
+PORT=9000
+
+# tracing/log filter (RUST_LOG-style). Default: "info,du_web=debug".
+RUST_LOG=info,du_web=debug
+
+# Public base URL, used to build absolute links. Default: https://decoding-us.com
+DU_BASE_URL=http://localhost:9000
+
+# Directory of vendored static assets. Default: the du-web crate's `assets/` dir
+# (the container image sets this to /app/assets).
+# DU_ASSETS_DIR=/app/assets
+
+# ── AT Protocol OAuth (du-web sign-in) ────────────────────────────────────────
+# Leave OAUTH_BASE_URL unset to disable OAuth entirely (the rest are then ignored).
+
+# Public base URL advertised in the client metadata document.
+# OAUTH_BASE_URL=https://your-appview.example
+# Requested scope. Default: "atproto transition:generic"
+# OAUTH_SCOPE=atproto transition:generic
+# EC private key (PEM) for DPoP / client-assertion signing.
+# OAUTH_EC_KEY=
+
+# Local-dev OAuth helpers (only for testing against a dev PDS):
+# DU_OAUTH_LOOPBACK=http://127.0.0.1:9000/oauth/callback
+# DU_OAUTH_DEV_PDS=https://localhost:2583
+# DU_OAUTH_DEV_RESOLVE=handle.example=did:plc:... # force handle->DID resolution
+# DU_OAUTH_DEV_CA=/path/to/dev-ca.pem # extra CA for the dev PDS
+
+# ── Curator / public forms ────────────────────────────────────────────────────
+
+# API key gating the machine curation-intake endpoint. Unset -> that endpoint is off.
+# DU_CURATION_API_KEY=
+
+# reCAPTCHA for public forms (e.g. suggest-a-paper). Both unset -> captcha disabled.
+# RECAPTCHA_SITE_KEY=
+# RECAPTCHA_SECRET=
+
+# ── Background jobs (du-jobs) ─────────────────────────────────────────────────
+
+# AT Protocol Jetstream firehose for the federated reporting mirror. Unset -> mirror off.
+# JETSTREAM_URL=wss://jetstream2.us-east.bsky.network/subscribe
+# Comma-separated lexicon collections to subscribe to.
+# JETSTREAM_COLLECTIONS=com.decodingus.atmosphere.haplogroupReconciliation,com.decodingus.atmosphere.populationBreakdown
+
+# Variant catalog ingest: path to the YBrowse GFF3 snapshot.
+# YBROWSE_GFF=/path/to/ybrowse.gff3
+# Base URL/dir for the T2T-CHM13 Y-region BED files. Has a built-in default.
+# YREGIONS_BASE=
+
+# ── External APIs (politeness pool + keys) ────────────────────────────────────
+
+# Contact email for the OpenAlex "polite pool" (publication discovery).
+# OPENALEX_MAILTO=you@example.com
+# NCBI E-utilities contact + optional API key (publication enrichment).
+# NCBI_EMAIL=you@example.com
+# NCBI_API_KEY=
+
+# Secret store (du-external): secrets are read from SECRET_ env vars by default
+# (e.g. the secret "api/token" -> SECRET_API_TOKEN); the optional `aws` feature reads
+# AWS Secrets Manager instead.
+# SECRET_API_TOKEN=
diff --git a/rust/.gitignore b/rust/.gitignore
new file mode 100644
index 00000000..5093373c
--- /dev/null
+++ b/rust/.gitignore
@@ -0,0 +1,7 @@
+/target
+.env
+# SQLx offline query cache is committed once a dev DB exists; ignore until then.
+.DS_Store
+
+# Local OAuth dev stack state (CA, env, PDS data)
+.oauth-dev/
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
new file mode 100644
index 00000000..3522827e
--- /dev/null
+++ b/rust/Cargo.lock
@@ -0,0 +1,5029 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
+[[package]]
+name = "ahash"
+version = "0.7.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9"
+dependencies = [
+ "getrandom 0.2.17",
+ "once_cell",
+ "version_check",
+]
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "allocator-api2"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "anstream"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
+
+[[package]]
+name = "anstyle-parse"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.102"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
+
+[[package]]
+name = "arbitrary"
+version = "1.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
+dependencies = [
+ "derive_arbitrary",
+]
+
+[[package]]
+name = "arc-swap"
+version = "1.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207"
+dependencies = [
+ "rustversion",
+]
+
+[[package]]
+name = "argon2"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c3610892ee6e0cbce8ae2700349fcf8f98adb0dbfbee85aec3c9179d29cc072"
+dependencies = [
+ "base64ct",
+ "blake2",
+ "cpufeatures 0.2.17",
+ "password-hash",
+]
+
+[[package]]
+name = "arrayvec"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
+
+[[package]]
+name = "askama"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b79091df18a97caea757e28cd2d5fda49c6cd4bd01ddffd7ff01ace0c0ad2c28"
+dependencies = [
+ "askama_derive",
+ "askama_escape",
+ "humansize",
+ "num-traits",
+ "percent-encoding",
+]
+
+[[package]]
+name = "askama_derive"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19fe8d6cb13c4714962c072ea496f3392015f0989b1a2847bb4b2d9effd71d83"
+dependencies = [
+ "askama_parser",
+ "basic-toml",
+ "mime",
+ "mime_guess",
+ "proc-macro2",
+ "quote",
+ "serde",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "askama_escape"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "619743e34b5ba4e9703bba34deac3427c72507c7159f5fd030aea8cac0cfe341"
+
+[[package]]
+name = "askama_parser"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acb1161c6b64d1c3d83108213c2a2533a342ac225aabd0bda218278c2ddb00c0"
+dependencies = [
+ "nom",
+]
+
+[[package]]
+name = "async-compression"
+version = "0.4.42"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e79b3f8a79cccc2898f31920fc69f304859b3bd567490f75ebf51ae1c792a9ac"
+dependencies = [
+ "compression-codecs",
+ "compression-core",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "async-trait"
+version = "0.1.89"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "atoi"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "atomic-waker"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
+
+[[package]]
+name = "autocfg"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53"
+
+[[package]]
+name = "aws-config"
+version = "1.8.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "517aa062d8bd9015ee23d6daa5e1c1372328412fdae4e6c4c1be9b69c6ad37a2"
+dependencies = [
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-sdk-sso",
+ "aws-sdk-ssooidc",
+ "aws-sdk-sts",
+ "aws-smithy-async",
+ "aws-smithy-http",
+ "aws-smithy-json",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-schema",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "fastrand",
+ "hex",
+ "http 1.4.1",
+ "sha1",
+ "time",
+ "tokio",
+ "tracing",
+ "url",
+ "zeroize",
+]
+
+[[package]]
+name = "aws-credential-types"
+version = "1.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f20799b373a1be121fe3005fba0c2090af9411573878f224df44b42727fcaf7"
+dependencies = [
+ "aws-smithy-async",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "zeroize",
+]
+
+[[package]]
+name = "aws-lc-rs"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ec2f1fc3ec205783a5da9a7e6c1509cc69dedf09a1949e412c1e18469326d00"
+dependencies = [
+ "aws-lc-sys",
+ "zeroize",
+]
+
+[[package]]
+name = "aws-lc-sys"
+version = "0.41.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a2f9779ce85b93ab6170dd940ad0169b5766ff848247aff13bb788b832fe3f4"
+dependencies = [
+ "cc",
+ "cmake",
+ "dunce",
+ "fs_extra",
+]
+
+[[package]]
+name = "aws-runtime"
+version = "1.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77ed8e8c52d2dc2390ad9f15647fe663f71e9780b4262c190fbb823a32721566"
+dependencies = [
+ "aws-credential-types",
+ "aws-sigv4",
+ "aws-smithy-async",
+ "aws-smithy-http",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "bytes-utils",
+ "fastrand",
+ "http 1.4.1",
+ "http-body 1.0.1",
+ "percent-encoding",
+ "pin-project-lite",
+ "tracing",
+ "uuid",
+]
+
+[[package]]
+name = "aws-sdk-secretsmanager"
+version = "1.106.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b6fa2aa029a7298bc3d863c253fe6745dac677620f20c337b6ca7cc208f7201"
+dependencies = [
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-smithy-async",
+ "aws-smithy-http",
+ "aws-smithy-json",
+ "aws-smithy-observability",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "fastrand",
+ "http 0.2.12",
+ "http 1.4.1",
+ "regex-lite",
+ "tracing",
+]
+
+[[package]]
+name = "aws-sdk-sesv2"
+version = "1.121.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8e944a491795ff4c590f16147fa0b05fb118bf3e7983570b04bb4cf5136fefd"
+dependencies = [
+ "arc-swap",
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-smithy-async",
+ "aws-smithy-http",
+ "aws-smithy-json",
+ "aws-smithy-observability",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "fastrand",
+ "http 0.2.12",
+ "http 1.4.1",
+ "regex-lite",
+ "tracing",
+]
+
+[[package]]
+name = "aws-sdk-sso"
+version = "1.100.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bee2719d4a5e5e147bb9e9b77490df6ece750df1094968aa857b09b618a1881a"
+dependencies = [
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-smithy-async",
+ "aws-smithy-http",
+ "aws-smithy-json",
+ "aws-smithy-observability",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "fastrand",
+ "http 0.2.12",
+ "http 1.4.1",
+ "regex-lite",
+ "tracing",
+]
+
+[[package]]
+name = "aws-sdk-ssooidc"
+version = "1.102.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b30d254992d56ef19f430396e5765b11e0f5bd21a7a557cb12fca1c8c18b9636"
+dependencies = [
+ "arc-swap",
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-smithy-async",
+ "aws-smithy-http",
+ "aws-smithy-json",
+ "aws-smithy-observability",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "fastrand",
+ "http 0.2.12",
+ "http 1.4.1",
+ "regex-lite",
+ "tracing",
+]
+
+[[package]]
+name = "aws-sdk-sts"
+version = "1.105.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59f4f8065fe615dbed9096458ba98dda6d641553ffd5aedd27e37e65211aca9f"
+dependencies = [
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-smithy-async",
+ "aws-smithy-http",
+ "aws-smithy-json",
+ "aws-smithy-observability",
+ "aws-smithy-query",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-smithy-xml",
+ "aws-types",
+ "fastrand",
+ "http 0.2.12",
+ "http 1.4.1",
+ "regex-lite",
+ "tracing",
+]
+
+[[package]]
+name = "aws-sigv4"
+version = "1.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7083fb918b38474ac65ffbf8a69fc8792d36879f4ac5f1667b43aec61efe9a5"
+dependencies = [
+ "aws-credential-types",
+ "aws-smithy-http",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "bytes",
+ "crypto-bigint",
+ "form_urlencoded",
+ "hex",
+ "hmac 0.13.0",
+ "http 0.2.12",
+ "http 1.4.1",
+ "p256",
+ "percent-encoding",
+ "sha2 0.11.0",
+ "subtle",
+ "time",
+ "tracing",
+ "zeroize",
+]
+
+[[package]]
+name = "aws-smithy-async"
+version = "1.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ffcaf626bdda484571968400c326a244598634dc75fd451325a54ad1a59acfc"
+dependencies = [
+ "futures-util",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "aws-smithy-http"
+version = "0.63.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba1ab2dc1c2c3749ead27180d333c42f11be8b0e934058fb4b2258ee8dbe5231"
+dependencies = [
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "bytes",
+ "bytes-utils",
+ "futures-core",
+ "futures-util",
+ "http 1.4.1",
+ "http-body 1.0.1",
+ "http-body-util",
+ "percent-encoding",
+ "pin-project-lite",
+ "pin-utils",
+ "tracing",
+]
+
+[[package]]
+name = "aws-smithy-http-client"
+version = "1.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c3ef8931ad1c98aa6a55b4256f847f3116090819844e0dd41ea682cac5dd2d3"
+dependencies = [
+ "aws-smithy-async",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "h2 0.3.27",
+ "h2 0.4.14",
+ "http 0.2.12",
+ "http 1.4.1",
+ "http-body 0.4.6",
+ "hyper 0.14.32",
+ "hyper 1.10.1",
+ "hyper-rustls 0.24.2",
+ "hyper-rustls 0.27.9",
+ "hyper-util",
+ "pin-project-lite",
+ "rustls 0.21.12",
+ "rustls 0.23.40",
+ "rustls-native-certs",
+ "rustls-pki-types",
+ "tokio",
+ "tokio-rustls 0.26.4",
+ "tower",
+ "tracing",
+]
+
+[[package]]
+name = "aws-smithy-json"
+version = "0.62.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "701a947f4797e52a911e114a898667c746c39feea467bbd1abd7b3721f702ffa"
+dependencies = [
+ "aws-smithy-runtime-api",
+ "aws-smithy-schema",
+ "aws-smithy-types",
+]
+
+[[package]]
+name = "aws-smithy-observability"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a06c2315d173edbf1920da8ba3a7189695827002e4c0fc961973ab1c54abca9c"
+dependencies = [
+ "aws-smithy-runtime-api",
+]
+
+[[package]]
+name = "aws-smithy-query"
+version = "0.60.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a56d79744fb3edb5d722ef79d86081e121d3b9422cb209eb03aea6aa4f21ebd"
+dependencies = [
+ "aws-smithy-types",
+ "urlencoding",
+]
+
+[[package]]
+name = "aws-smithy-runtime"
+version = "1.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8e6f5caf6fea86f8c2206541ab5857cfcda9013426cdbe8fa0098b9e2d32182"
+dependencies = [
+ "aws-smithy-async",
+ "aws-smithy-http",
+ "aws-smithy-http-client",
+ "aws-smithy-observability",
+ "aws-smithy-runtime-api",
+ "aws-smithy-schema",
+ "aws-smithy-types",
+ "bytes",
+ "fastrand",
+ "http 0.2.12",
+ "http 1.4.1",
+ "http-body 0.4.6",
+ "http-body 1.0.1",
+ "http-body-util",
+ "pin-project-lite",
+ "pin-utils",
+ "tokio",
+ "tracing",
+]
+
+[[package]]
+name = "aws-smithy-runtime-api"
+version = "1.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9db177daa6ba8afb9ee1aefcf548c907abcf52065e394ee11a92780057fe0e8c"
+dependencies = [
+ "aws-smithy-async",
+ "aws-smithy-runtime-api-macros",
+ "aws-smithy-types",
+ "bytes",
+ "http 0.2.12",
+ "http 1.4.1",
+ "pin-project-lite",
+ "tokio",
+ "tracing",
+ "zeroize",
+]
+
+[[package]]
+name = "aws-smithy-runtime-api-macros"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8d7396fd9500589e62e460e987ecb671bad374934e55ec3b5f498cc7a8a8a7b7"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "aws-smithy-schema"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7442cb268338f0eb8278140a107c046756aa01093d8ef5e99628d34ae09c94f5"
+dependencies = [
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "http 1.4.1",
+]
+
+[[package]]
+name = "aws-smithy-types"
+version = "1.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53f93074121a1be41317b9aa607143ae17900631f7f59a99f2b905d519d6783b"
+dependencies = [
+ "base64-simd",
+ "bytes",
+ "bytes-utils",
+ "futures-core",
+ "http 0.2.12",
+ "http 1.4.1",
+ "http-body 0.4.6",
+ "http-body 1.0.1",
+ "http-body-util",
+ "itoa",
+ "num-integer",
+ "pin-project-lite",
+ "pin-utils",
+ "ryu",
+ "serde",
+ "time",
+ "tokio",
+ "tokio-util",
+]
+
+[[package]]
+name = "aws-smithy-xml"
+version = "0.60.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ce02add1aa3677d022f8adf81dcbe3046a95f17a1b1e8979c145cd21d3d22b3"
+dependencies = [
+ "xmlparser",
+]
+
+[[package]]
+name = "aws-types"
+version = "1.3.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d16bf10b03a3c01e6b3b7d47cd964e873ffe9e7d4e80fad16bd4c077cb068531"
+dependencies = [
+ "aws-credential-types",
+ "aws-smithy-async",
+ "aws-smithy-runtime-api",
+ "aws-smithy-schema",
+ "aws-smithy-types",
+ "rustc_version",
+ "tracing",
+]
+
+[[package]]
+name = "axum"
+version = "0.7.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
+dependencies = [
+ "async-trait",
+ "axum-core",
+ "axum-macros",
+ "bytes",
+ "futures-util",
+ "http 1.4.1",
+ "http-body 1.0.1",
+ "http-body-util",
+ "hyper 1.10.1",
+ "hyper-util",
+ "itoa",
+ "matchit",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "rustversion",
+ "serde",
+ "serde_json",
+ "serde_path_to_error",
+ "serde_urlencoded",
+ "sync_wrapper",
+ "tokio",
+ "tower",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "axum-core"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "futures-util",
+ "http 1.4.1",
+ "http-body 1.0.1",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "rustversion",
+ "sync_wrapper",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "axum-macros"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57d123550fa8d071b7255cb0cc04dc302baa6c8c4a79f55701552684d8399bce"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "base-x"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270"
+
+[[package]]
+name = "base16ct"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf"
+
+[[package]]
+name = "base256emoji"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5e9430d9a245a77c92176e649af6e275f20839a48389859d1661e9a128d077c"
+dependencies = [
+ "const-str",
+ "match-lookup",
+]
+
+[[package]]
+name = "base64"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+
+[[package]]
+name = "base64-simd"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195"
+dependencies = [
+ "outref",
+ "vsimd",
+]
+
+[[package]]
+name = "base64ct"
+version = "1.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06"
+
+[[package]]
+name = "basic-toml"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba62675e8242a4c4e806d12f11d136e626e6c8361d6b829310732241652a178a"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "bcrypt"
+version = "0.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b1866ecef4f2d06a0bb77880015fdf2b89e25a1c2e5addacb87e459c86dc67e"
+dependencies = [
+ "base64",
+ "blowfish",
+ "getrandom 0.2.17",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "bitflags"
+version = "2.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "bitvec"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
+dependencies = [
+ "funty",
+ "radium",
+ "tap",
+ "wyz",
+]
+
+[[package]]
+name = "blake2"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe"
+dependencies = [
+ "digest 0.10.7",
+]
+
+[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array",
+]
+
+[[package]]
+name = "block-buffer"
+version = "0.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be"
+dependencies = [
+ "hybrid-array",
+]
+
+[[package]]
+name = "blowfish"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e412e2cd0f2b2d93e02543ceae7917b3c70331573df19ee046bcbc35e45e87d7"
+dependencies = [
+ "byteorder",
+ "cipher",
+]
+
+[[package]]
+name = "borsh"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfd1e3f8955a5d7de9fab72fc8373fade9fb8a703968cb200ae3dc6cf08e185a"
+dependencies = [
+ "borsh-derive",
+ "bytes",
+ "cfg_aliases",
+]
+
+[[package]]
+name = "borsh-derive"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfcfdc083699101d5a7965e49925975f2f55060f94f9a05e7187be95d530ca59"
+dependencies = [
+ "once_cell",
+ "proc-macro-crate",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "bumpalo"
+version = "3.20.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649"
+
+[[package]]
+name = "bytecheck"
+version = "0.6.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2"
+dependencies = [
+ "bytecheck_derive",
+ "ptr_meta",
+ "simdutf8",
+]
+
+[[package]]
+name = "bytecheck_derive"
+version = "0.6.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+[[package]]
+name = "bytes"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
+
+[[package]]
+name = "bytes-utils"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35"
+dependencies = [
+ "bytes",
+ "either",
+]
+
+[[package]]
+name = "cc"
+version = "1.2.63"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f"
+dependencies = [
+ "find-msvc-tools",
+ "jobserver",
+ "libc",
+ "shlex",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "cfg_aliases"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
+
+[[package]]
+name = "chrono"
+version = "0.4.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0"
+dependencies = [
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "serde",
+ "wasm-bindgen",
+ "windows-link",
+]
+
+[[package]]
+name = "cipher"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
+dependencies = [
+ "crypto-common 0.1.7",
+ "inout",
+]
+
+[[package]]
+name = "clap"
+version = "4.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "clap_lex"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
+
+[[package]]
+name = "cmake"
+version = "0.1.58"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "cmov"
+version = "0.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c9ea0ac24bc397ab3c98583a3c9ba74fa56b09a4449bbe172b9b1ddb016027a"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
+
+[[package]]
+name = "compression-codecs"
+version = "0.4.38"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce2548391e9c1929c21bf6aa2680af86fe4c1b33e6cea9ac1cfeec0bd11218cf"
+dependencies = [
+ "compression-core",
+ "flate2",
+ "memchr",
+]
+
+[[package]]
+name = "compression-core"
+version = "0.4.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789"
+
+[[package]]
+name = "concurrent-queue"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "const-oid"
+version = "0.9.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
+
+[[package]]
+name = "const-oid"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c"
+
+[[package]]
+name = "const-str"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3"
+
+[[package]]
+name = "cookie"
+version = "0.18.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747"
+dependencies = [
+ "base64",
+ "hmac 0.12.1",
+ "percent-encoding",
+ "rand 0.8.6",
+ "sha2 0.10.9",
+ "subtle",
+ "time",
+ "version_check",
+]
+
+[[package]]
+name = "core-foundation"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "cpufeatures"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "cpufeatures"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "crc"
+version = "3.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d"
+dependencies = [
+ "crc-catalog",
+]
+
+[[package]]
+name = "crc-catalog"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "217698eaf96b4a3f0bc4f3662aaa55bdf913cd54d7204591faa790070c6d0853"
+
+[[package]]
+name = "crc32fast"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "crossbeam-queue"
+version = "0.3.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "crypto-bigint"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76"
+dependencies = [
+ "generic-array",
+ "rand_core 0.6.4",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "crypto-common"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
+name = "crypto-common"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce6e4c961d6cd6c9a86db418387425e8bdeaf05b3c8bc1411e6dca4c252f1453"
+dependencies = [
+ "hybrid-array",
+]
+
+[[package]]
+name = "ctutils"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d5515a3834141de9eafb9717ad39eea8247b5674e6066c404e8c4b365d2a29e"
+dependencies = [
+ "cmov",
+]
+
+[[package]]
+name = "curve25519-dalek"
+version = "4.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be"
+dependencies = [
+ "cfg-if",
+ "cpufeatures 0.2.17",
+ "curve25519-dalek-derive",
+ "digest 0.10.7",
+ "fiat-crypto",
+ "rustc_version",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "curve25519-dalek-derive"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "data-encoding"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8"
+
+[[package]]
+name = "data-encoding-macro"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3259c913752a86488b501ed8680446a5ed2d5aeac6e596cb23ba3800768ea32c"
+dependencies = [
+ "data-encoding",
+ "data-encoding-macro-internal",
+]
+
+[[package]]
+name = "data-encoding-macro-internal"
+version = "0.1.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccc2776f0c61eca1ca32528f85548abd1a4be8fb53d1b21c013e4f18da1e7090"
+dependencies = [
+ "data-encoding",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "der"
+version = "0.7.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb"
+dependencies = [
+ "const-oid 0.9.6",
+ "pem-rfc7468",
+ "zeroize",
+]
+
+[[package]]
+name = "deranged"
+version = "0.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c"
+dependencies = [
+ "powerfmt",
+]
+
+[[package]]
+name = "derive_arbitrary"
+version = "1.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "digest"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+dependencies = [
+ "block-buffer 0.10.4",
+ "const-oid 0.9.6",
+ "crypto-common 0.1.7",
+ "subtle",
+]
+
+[[package]]
+name = "digest"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2"
+dependencies = [
+ "block-buffer 0.12.0",
+ "const-oid 0.10.2",
+ "crypto-common 0.2.2",
+ "ctutils",
+]
+
+[[package]]
+name = "displaydoc"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "dotenvy"
+version = "0.15.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
+
+[[package]]
+name = "du-atproto"
+version = "0.1.0"
+source = "git+https://github.com/JamesKane/decodingus-shared.git?rev=f975a0835fe67b1e86d919fcb085fca4db1cb1f4#f975a0835fe67b1e86d919fcb085fca4db1cb1f4"
+dependencies = [
+ "base64",
+ "du-domain",
+ "ed25519-dalek",
+ "multibase",
+ "p256",
+ "rand_core 0.6.4",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "sha2 0.10.9",
+ "thiserror 2.0.18",
+]
+
+[[package]]
+name = "du-bio"
+version = "0.1.0"
+source = "git+https://github.com/JamesKane/decodingus-shared.git?rev=f975a0835fe67b1e86d919fcb085fca4db1cb1f4#f975a0835fe67b1e86d919fcb085fca4db1cb1f4"
+dependencies = [
+ "du-domain",
+ "thiserror 2.0.18",
+]
+
+[[package]]
+name = "du-db"
+version = "0.1.0"
+dependencies = [
+ "chrono",
+ "du-domain",
+ "serde",
+ "serde_json",
+ "sqlx",
+ "thiserror 2.0.18",
+ "tokio",
+ "tracing",
+ "uuid",
+]
+
+[[package]]
+name = "du-domain"
+version = "0.1.0"
+source = "git+https://github.com/JamesKane/decodingus-shared.git?rev=f975a0835fe67b1e86d919fcb085fca4db1cb1f4#f975a0835fe67b1e86d919fcb085fca4db1cb1f4"
+dependencies = [
+ "chrono",
+ "rust_decimal",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "uuid",
+]
+
+[[package]]
+name = "du-external"
+version = "0.1.0"
+dependencies = [
+ "aws-config",
+ "aws-sdk-secretsmanager",
+ "aws-sdk-sesv2",
+ "chrono",
+ "du-domain",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "tokio",
+ "tracing",
+]
+
+[[package]]
+name = "du-jobs"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "chrono",
+ "du-bio",
+ "du-db",
+ "du-domain",
+ "du-external",
+ "futures-util",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "tokio",
+ "tokio-tungstenite",
+ "tracing",
+ "tracing-subscriber",
+]
+
+[[package]]
+name = "du-migrate"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "clap",
+ "du-db",
+ "du-domain",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "sqlx",
+ "tokio",
+ "tracing",
+ "tracing-subscriber",
+ "uuid",
+]
+
+[[package]]
+name = "du-web"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "argon2",
+ "askama",
+ "axum",
+ "base64",
+ "bcrypt",
+ "chrono",
+ "du-atproto",
+ "du-db",
+ "du-domain",
+ "du-external",
+ "ed25519-dalek",
+ "percent-encoding",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "sha2 0.10.9",
+ "sqlx",
+ "tokio",
+ "tower",
+ "tower-cookies",
+ "tower-http",
+ "tracing",
+ "tracing-subscriber",
+ "utoipa",
+ "utoipa-swagger-ui",
+ "uuid",
+]
+
+[[package]]
+name = "dunce"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
+
+[[package]]
+name = "ecdsa"
+version = "0.16.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca"
+dependencies = [
+ "der",
+ "digest 0.10.7",
+ "elliptic-curve",
+ "rfc6979",
+ "signature",
+ "spki",
+]
+
+[[package]]
+name = "ed25519"
+version = "2.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53"
+dependencies = [
+ "pkcs8",
+ "signature",
+]
+
+[[package]]
+name = "ed25519-dalek"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70e796c081cee67dc755e1a36a0a172b897fab85fc3f6bc48307991f64e4eca9"
+dependencies = [
+ "curve25519-dalek",
+ "ed25519",
+ "serde",
+ "sha2 0.10.9",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "either"
+version = "1.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "elliptic-curve"
+version = "0.13.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47"
+dependencies = [
+ "base16ct",
+ "crypto-bigint",
+ "digest 0.10.7",
+ "ff",
+ "generic-array",
+ "group",
+ "pem-rfc7468",
+ "pkcs8",
+ "rand_core 0.6.4",
+ "sec1",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "errno"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
+dependencies = [
+ "libc",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "etcetera"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943"
+dependencies = [
+ "cfg-if",
+ "home",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "event-listener"
+version = "5.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab"
+dependencies = [
+ "concurrent-queue",
+ "parking",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "fastrand"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6"
+
+[[package]]
+name = "ff"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393"
+dependencies = [
+ "rand_core 0.6.4",
+ "subtle",
+]
+
+[[package]]
+name = "fiat-crypto"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d"
+
+[[package]]
+name = "find-msvc-tools"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
+
+[[package]]
+name = "flate2"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "flume"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+ "spin",
+]
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "foldhash"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+
+[[package]]
+name = "form_urlencoded"
+version = "1.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf"
+dependencies = [
+ "percent-encoding",
+]
+
+[[package]]
+name = "fs_extra"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
+
+[[package]]
+name = "funty"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
+
+[[package]]
+name = "futures-channel"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
+
+[[package]]
+name = "futures-executor"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-intrusive"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f"
+dependencies = [
+ "futures-core",
+ "lock_api",
+ "parking_lot",
+]
+
+[[package]]
+name = "futures-io"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718"
+
+[[package]]
+name = "futures-macro"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893"
+
+[[package]]
+name = "futures-task"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
+
+[[package]]
+name = "futures-util"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
+dependencies = [
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "slab",
+]
+
+[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+ "zeroize",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "libc",
+ "wasi",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "libc",
+ "r-efi 5.3.0",
+ "wasip2",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi 6.0.0",
+ "wasip2",
+ "wasip3",
+]
+
+[[package]]
+name = "group"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63"
+dependencies = [
+ "ff",
+ "rand_core 0.6.4",
+ "subtle",
+]
+
+[[package]]
+name = "h2"
+version = "0.3.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d"
+dependencies = [
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "futures-util",
+ "http 0.2.12",
+ "indexmap",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
+[[package]]
+name = "h2"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "171fefbc92fe4a4de27e0698d6a5b392d6a0e333506bc49133760b3bcf948733"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "http 1.4.1",
+ "indexmap",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+dependencies = [
+ "ahash",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+dependencies = [
+ "allocator-api2",
+ "equivalent",
+ "foldhash",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.17.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
+
+[[package]]
+name = "hashlink"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
+dependencies = [
+ "hashbrown 0.15.5",
+]
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "hex"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
+
+[[package]]
+name = "hkdf"
+version = "0.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7"
+dependencies = [
+ "hmac 0.12.1",
+]
+
+[[package]]
+name = "hmac"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
+dependencies = [
+ "digest 0.10.7",
+]
+
+[[package]]
+name = "hmac"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6303bc9732ae41b04cb554b844a762b4115a61bfaa81e3e83050991eeb56863f"
+dependencies = [
+ "digest 0.11.3",
+]
+
+[[package]]
+name = "home"
+version = "0.5.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "http"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1"
+dependencies = [
+ "bytes",
+ "fnv",
+ "itoa",
+]
+
+[[package]]
+name = "http"
+version = "1.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8be7462df143984c4598a256ef469b251d7d7f9e271135073e78fc535414f3d0"
+dependencies = [
+ "bytes",
+ "itoa",
+]
+
+[[package]]
+name = "http-body"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2"
+dependencies = [
+ "bytes",
+ "http 0.2.12",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "http-body"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
+dependencies = [
+ "bytes",
+ "http 1.4.1",
+]
+
+[[package]]
+name = "http-body-util"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http 1.4.1",
+ "http-body 1.0.1",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "http-range-header"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9171a2ea8a68358193d15dd5d70c1c10a2afc3e7e4c5bc92bc9f025cebd7359c"
+
+[[package]]
+name = "httparse"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
+
+[[package]]
+name = "httpdate"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
+
+[[package]]
+name = "humansize"
+version = "2.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6cb51c9a029ddc91b07a787f1d86b53ccfa49b0e86688c946ebe8d3555685dd7"
+dependencies = [
+ "libm",
+]
+
+[[package]]
+name = "hybrid-array"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9155a582abd142abc056962c29e3ce5ff2ad5469f4246b537ed42c5deba857da"
+dependencies = [
+ "typenum",
+]
+
+[[package]]
+name = "hyper"
+version = "0.14.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7"
+dependencies = [
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "futures-util",
+ "h2 0.3.27",
+ "http 0.2.12",
+ "http-body 0.4.6",
+ "httparse",
+ "httpdate",
+ "itoa",
+ "pin-project-lite",
+ "socket2 0.5.10",
+ "tokio",
+ "tower-service",
+ "tracing",
+ "want",
+]
+
+[[package]]
+name = "hyper"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55281c53a1894c864990125767da440a4e630446785086f52523b20033b74498"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "h2 0.4.14",
+ "http 1.4.1",
+ "http-body 1.0.1",
+ "httparse",
+ "httpdate",
+ "itoa",
+ "pin-project-lite",
+ "smallvec",
+ "tokio",
+ "want",
+]
+
+[[package]]
+name = "hyper-rustls"
+version = "0.24.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590"
+dependencies = [
+ "futures-util",
+ "http 0.2.12",
+ "hyper 0.14.32",
+ "log",
+ "rustls 0.21.12",
+ "tokio",
+ "tokio-rustls 0.24.1",
+]
+
+[[package]]
+name = "hyper-rustls"
+version = "0.27.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f"
+dependencies = [
+ "http 1.4.1",
+ "hyper 1.10.1",
+ "hyper-util",
+ "rustls 0.23.40",
+ "rustls-native-certs",
+ "tokio",
+ "tokio-rustls 0.26.4",
+ "tower-service",
+ "webpki-roots 1.0.7",
+]
+
+[[package]]
+name = "hyper-util"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0"
+dependencies = [
+ "base64",
+ "bytes",
+ "futures-channel",
+ "futures-util",
+ "http 1.4.1",
+ "http-body 1.0.1",
+ "hyper 1.10.1",
+ "ipnet",
+ "libc",
+ "percent-encoding",
+ "pin-project-lite",
+ "socket2 0.6.4",
+ "tokio",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.65"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "icu_collections"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c"
+dependencies = [
+ "displaydoc",
+ "potential_utf",
+ "utf8_iter",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locale_core"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29"
+dependencies = [
+ "displaydoc",
+ "litemap",
+ "tinystr",
+ "writeable",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4"
+dependencies = [
+ "icu_collections",
+ "icu_normalizer_data",
+ "icu_properties",
+ "icu_provider",
+ "smallvec",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer_data"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38"
+
+[[package]]
+name = "icu_properties"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de"
+dependencies = [
+ "icu_collections",
+ "icu_locale_core",
+ "icu_properties_data",
+ "icu_provider",
+ "zerotrie",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_properties_data"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14"
+
+[[package]]
+name = "icu_provider"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421"
+dependencies = [
+ "displaydoc",
+ "icu_locale_core",
+ "writeable",
+ "yoke",
+ "zerofrom",
+ "zerotrie",
+ "zerovec",
+]
+
+[[package]]
+name = "id-arena"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
+
+[[package]]
+name = "idna"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
+dependencies = [
+ "idna_adapter",
+ "smallvec",
+ "utf8_iter",
+]
+
+[[package]]
+name = "idna_adapter"
+version = "1.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714"
+dependencies = [
+ "icu_normalizer",
+ "icu_properties",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.17.1",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "inout"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01"
+dependencies = [
+ "generic-array",
+]
+
+[[package]]
+name = "ipnet"
+version = "2.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2"
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
+
+[[package]]
+name = "itoa"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
+
+[[package]]
+name = "jobserver"
+version = "0.1.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
+dependencies = [
+ "getrandom 0.3.4",
+ "libc",
+]
+
+[[package]]
+name = "js-sys"
+version = "0.3.99"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11"
+dependencies = [
+ "cfg-if",
+ "futures-util",
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+dependencies = [
+ "spin",
+]
+
+[[package]]
+name = "leb128fmt"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
+
+[[package]]
+name = "libc"
+version = "0.2.186"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
+
+[[package]]
+name = "libm"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
+
+[[package]]
+name = "libredox"
+version = "0.1.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f02ab6bace2054fb888a3c16f990117b579d14a3088e472d63c6011fa185c9d3"
+dependencies = [
+ "bitflags",
+ "libc",
+ "plain",
+ "redox_syscall 0.8.0",
+]
+
+[[package]]
+name = "libsqlite3-sys"
+version = "0.30.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149"
+dependencies = [
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "litemap"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0"
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "616ec5685824bcc94416c6d4a7a446eea774a31efd7062c8480ba6fd06d7a6e5"
+
+[[package]]
+name = "lru-slab"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
+
+[[package]]
+name = "match-lookup"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "757aee279b8bdbb9f9e676796fd459e4207a1f986e87886700abf589f5abf771"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "matchers"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9"
+dependencies = [
+ "regex-automata",
+]
+
+[[package]]
+name = "matchit"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
+
+[[package]]
+name = "md-5"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
+dependencies = [
+ "cfg-if",
+ "digest 0.10.7",
+]
+
+[[package]]
+name = "memchr"
+version = "2.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8"
+
+[[package]]
+name = "mime"
+version = "0.3.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
+
+[[package]]
+name = "mime_guess"
+version = "2.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e"
+dependencies = [
+ "mime",
+ "unicase",
+]
+
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+ "simd-adler32",
+]
+
+[[package]]
+name = "mio"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02bd0af71c67b473010cbbc60715ee815645a4dc942899111f494b4b737d6fda"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "multibase"
+version = "0.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8694bb4835f452b0e3bb06dbebb1d6fc5385b6ca1caf2e55fd165c042390ec77"
+dependencies = [
+ "base-x",
+ "base256emoji",
+ "data-encoding",
+ "data-encoding-macro",
+]
+
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
+[[package]]
+name = "nu-ansi-term"
+version = "0.50.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "num-bigint-dig"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7"
+dependencies = [
+ "lazy_static",
+ "libm",
+ "num-integer",
+ "num-iter",
+ "num-traits",
+ "rand 0.8.6",
+ "smallvec",
+ "zeroize",
+]
+
+[[package]]
+name = "num-conv"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "521739c6d2bac4aa25192232afe6841231376b2b26d4d9fae5ecf8ca5772e441"
+
+[[package]]
+name = "num-integer"
+version = "0.1.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-iter"
+version = "0.1.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+ "libm",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
+
+[[package]]
+name = "openssl-probe"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe"
+
+[[package]]
+name = "outref"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e"
+
+[[package]]
+name = "p256"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c9863ad85fa8f4460f9c48cb909d38a0d689dba1f6f6988a5e3e0d31071bcd4b"
+dependencies = [
+ "ecdsa",
+ "elliptic-curve",
+ "primeorder",
+ "sha2 0.10.9",
+]
+
+[[package]]
+name = "parking"
+version = "2.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba"
+
+[[package]]
+name = "parking_lot"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall 0.5.18",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "password-hash"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "346f04948ba92c43e8469c1ee6736c7563d71012b17d40745260fe106aac2166"
+dependencies = [
+ "base64ct",
+ "rand_core 0.6.4",
+ "subtle",
+]
+
+[[package]]
+name = "pem-rfc7468"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412"
+dependencies = [
+ "base64ct",
+]
+
+[[package]]
+name = "percent-encoding"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "pkcs1"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f"
+dependencies = [
+ "der",
+ "pkcs8",
+ "spki",
+]
+
+[[package]]
+name = "pkcs8"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7"
+dependencies = [
+ "der",
+ "spki",
+]
+
+[[package]]
+name = "pkg-config"
+version = "0.3.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e"
+
+[[package]]
+name = "plain"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
+
+[[package]]
+name = "potential_utf"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564"
+dependencies = [
+ "zerovec",
+]
+
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "prettyplease"
+version = "0.2.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
+dependencies = [
+ "proc-macro2",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "primeorder"
+version = "0.13.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "353e1ca18966c16d9deb1c69278edbc5f194139612772bd9537af60ac231e1e6"
+dependencies = [
+ "elliptic-curve",
+]
+
+[[package]]
+name = "proc-macro-crate"
+version = "3.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f"
+dependencies = [
+ "toml_edit",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "ptr_meta"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1"
+dependencies = [
+ "ptr_meta_derive",
+]
+
+[[package]]
+name = "ptr_meta_derive"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "quinn"
+version = "0.11.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20"
+dependencies = [
+ "bytes",
+ "cfg_aliases",
+ "pin-project-lite",
+ "quinn-proto",
+ "quinn-udp",
+ "rustc-hash",
+ "rustls 0.23.40",
+ "socket2 0.6.4",
+ "thiserror 2.0.18",
+ "tokio",
+ "tracing",
+ "web-time",
+]
+
+[[package]]
+name = "quinn-proto"
+version = "0.11.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098"
+dependencies = [
+ "bytes",
+ "getrandom 0.3.4",
+ "lru-slab",
+ "rand 0.9.4",
+ "ring",
+ "rustc-hash",
+ "rustls 0.23.40",
+ "rustls-pki-types",
+ "slab",
+ "thiserror 2.0.18",
+ "tinyvec",
+ "tracing",
+ "web-time",
+]
+
+[[package]]
+name = "quinn-udp"
+version = "0.5.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd"
+dependencies = [
+ "cfg_aliases",
+ "libc",
+ "once_cell",
+ "socket2 0.6.4",
+ "tracing",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "r-efi"
+version = "6.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
+
+[[package]]
+name = "radium"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
+
+[[package]]
+name = "rand"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a"
+dependencies = [
+ "libc",
+ "rand_chacha 0.3.1",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
+dependencies = [
+ "rand_chacha 0.9.0",
+ "rand_core 0.9.5",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.9.5",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom 0.2.17",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
+dependencies = [
+ "getrandom 0.3.4",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c7591fa2c6b601dfcfe5f043f65a1c39fcdf50efefcd7f1572e538c1f4b398d"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "regex"
+version = "1.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-lite"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973"
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
+
+[[package]]
+name = "rend"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c"
+dependencies = [
+ "bytecheck",
+]
+
+[[package]]
+name = "reqwest"
+version = "0.12.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
+dependencies = [
+ "base64",
+ "bytes",
+ "futures-core",
+ "http 1.4.1",
+ "http-body 1.0.1",
+ "http-body-util",
+ "hyper 1.10.1",
+ "hyper-rustls 0.27.9",
+ "hyper-util",
+ "js-sys",
+ "log",
+ "percent-encoding",
+ "pin-project-lite",
+ "quinn",
+ "rustls 0.23.40",
+ "rustls-pki-types",
+ "serde",
+ "serde_json",
+ "serde_urlencoded",
+ "sync_wrapper",
+ "tokio",
+ "tokio-rustls 0.26.4",
+ "tower",
+ "tower-http",
+ "tower-service",
+ "url",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "web-sys",
+ "webpki-roots 1.0.7",
+]
+
+[[package]]
+name = "rfc6979"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2"
+dependencies = [
+ "hmac 0.12.1",
+ "subtle",
+]
+
+[[package]]
+name = "ring"
+version = "0.17.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
+dependencies = [
+ "cc",
+ "cfg-if",
+ "getrandom 0.2.17",
+ "libc",
+ "untrusted",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "rkyv"
+version = "0.7.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2297bf9c81a3f0dc96bc9521370b88f054168c29826a75e89c55ff196e7ed6a1"
+dependencies = [
+ "bitvec",
+ "bytecheck",
+ "bytes",
+ "hashbrown 0.12.3",
+ "ptr_meta",
+ "rend",
+ "rkyv_derive",
+ "seahash",
+ "tinyvec",
+ "uuid",
+]
+
+[[package]]
+name = "rkyv_derive"
+version = "0.7.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "84d7b42d4b8d06048d3ac8db0eb31bcb942cbeb709f0b5f2b2ebde398d3038f5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "rsa"
+version = "0.9.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d"
+dependencies = [
+ "const-oid 0.9.6",
+ "digest 0.10.7",
+ "num-bigint-dig",
+ "num-integer",
+ "num-traits",
+ "pkcs1",
+ "pkcs8",
+ "rand_core 0.6.4",
+ "signature",
+ "spki",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "rust-embed"
+version = "8.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04113cb9355a377d83f06ef1f0a45b8ab8cd7d8b1288160717d66df5c7988d27"
+dependencies = [
+ "rust-embed-impl",
+ "rust-embed-utils",
+ "walkdir",
+]
+
+[[package]]
+name = "rust-embed-impl"
+version = "8.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da0902e4c7c8e997159ab384e6d0fc91c221375f6894346ae107f47dd0f3ccaa"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "rust-embed-utils",
+ "syn 2.0.117",
+ "walkdir",
+]
+
+[[package]]
+name = "rust-embed-utils"
+version = "8.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5bcdef0be6fe7f6fa333b1073c949729274b05f123a0ad7efcb8efd878e5c3b1"
+dependencies = [
+ "sha2 0.10.9",
+ "walkdir",
+]
+
+[[package]]
+name = "rust_decimal"
+version = "1.42.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c5108e3d4d903e21aac27f12ba5377b6b34f9f44b325e4894c7924169d06995"
+dependencies = [
+ "arrayvec",
+ "borsh",
+ "bytes",
+ "num-traits",
+ "rand 0.8.6",
+ "rkyv",
+ "serde",
+ "serde_json",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "rustc-hash"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
+
+[[package]]
+name = "rustc_version"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
+dependencies = [
+ "semver",
+]
+
+[[package]]
+name = "rustls"
+version = "0.21.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e"
+dependencies = [
+ "log",
+ "ring",
+ "rustls-webpki 0.101.7",
+ "sct",
+]
+
+[[package]]
+name = "rustls"
+version = "0.23.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b"
+dependencies = [
+ "aws-lc-rs",
+ "once_cell",
+ "ring",
+ "rustls-pki-types",
+ "rustls-webpki 0.103.13",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "rustls-native-certs"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dab5152771c58876a2146916e53e35057e1a4dfa2b9df0f0305b07f611fdea4d"
+dependencies = [
+ "openssl-probe",
+ "rustls-pki-types",
+ "schannel",
+ "security-framework",
+]
+
+[[package]]
+name = "rustls-pki-types"
+version = "1.14.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9"
+dependencies = [
+ "web-time",
+ "zeroize",
+]
+
+[[package]]
+name = "rustls-webpki"
+version = "0.101.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
+dependencies = [
+ "ring",
+ "untrusted",
+]
+
+[[package]]
+name = "rustls-webpki"
+version = "0.103.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e"
+dependencies = [
+ "aws-lc-rs",
+ "ring",
+ "rustls-pki-types",
+ "untrusted",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "ryu"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "schannel"
+version = "0.1.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "sct"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
+dependencies = [
+ "ring",
+ "untrusted",
+]
+
+[[package]]
+name = "seahash"
+version = "4.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
+
+[[package]]
+name = "sec1"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc"
+dependencies = [
+ "base16ct",
+ "der",
+ "generic-array",
+ "pkcs8",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "security-framework"
+version = "3.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d"
+dependencies = [
+ "bitflags",
+ "core-foundation",
+ "core-foundation-sys",
+ "libc",
+ "security-framework-sys",
+]
+
+[[package]]
+name = "security-framework-sys"
+version = "2.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "semver"
+version = "1.0.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.150"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9"
+dependencies = [
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
+name = "serde_path_to_error"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457"
+dependencies = [
+ "itoa",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "serde_urlencoded"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
+dependencies = [
+ "form_urlencoded",
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "sha1"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
+dependencies = [
+ "cfg-if",
+ "cpufeatures 0.2.17",
+ "digest 0.10.7",
+]
+
+[[package]]
+name = "sha2"
+version = "0.10.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
+dependencies = [
+ "cfg-if",
+ "cpufeatures 0.2.17",
+ "digest 0.10.7",
+]
+
+[[package]]
+name = "sha2"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4"
+dependencies = [
+ "cfg-if",
+ "cpufeatures 0.3.0",
+ "digest 0.11.3",
+]
+
+[[package]]
+name = "sharded-slab"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
+dependencies = [
+ "lazy_static",
+]
+
+[[package]]
+name = "shlex"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba"
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b"
+dependencies = [
+ "errno",
+ "libc",
+]
+
+[[package]]
+name = "signature"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
+dependencies = [
+ "digest 0.10.7",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "simd-adler32"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
+
+[[package]]
+name = "simdutf8"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
+
+[[package]]
+name = "slab"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "socket2"
+version = "0.5.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "socket2"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51"
+dependencies = [
+ "libc",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "spin"
+version = "0.9.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
+dependencies = [
+ "lock_api",
+]
+
+[[package]]
+name = "spki"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d"
+dependencies = [
+ "base64ct",
+ "der",
+]
+
+[[package]]
+name = "sqlx"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc"
+dependencies = [
+ "sqlx-core",
+ "sqlx-macros",
+ "sqlx-mysql",
+ "sqlx-postgres",
+ "sqlx-sqlite",
+]
+
+[[package]]
+name = "sqlx-core"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6"
+dependencies = [
+ "base64",
+ "bytes",
+ "chrono",
+ "crc",
+ "crossbeam-queue",
+ "either",
+ "event-listener",
+ "futures-core",
+ "futures-intrusive",
+ "futures-io",
+ "futures-util",
+ "hashbrown 0.15.5",
+ "hashlink",
+ "indexmap",
+ "log",
+ "memchr",
+ "once_cell",
+ "percent-encoding",
+ "rustls 0.23.40",
+ "serde",
+ "serde_json",
+ "sha2 0.10.9",
+ "smallvec",
+ "thiserror 2.0.18",
+ "tokio",
+ "tokio-stream",
+ "tracing",
+ "url",
+ "uuid",
+ "webpki-roots 0.26.11",
+]
+
+[[package]]
+name = "sqlx-macros"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "sqlx-core",
+ "sqlx-macros-core",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "sqlx-macros-core"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19a9c1841124ac5a61741f96e1d9e2ec77424bf323962dd894bdb93f37d5219b"
+dependencies = [
+ "dotenvy",
+ "either",
+ "heck",
+ "hex",
+ "once_cell",
+ "proc-macro2",
+ "quote",
+ "serde",
+ "serde_json",
+ "sha2 0.10.9",
+ "sqlx-core",
+ "sqlx-mysql",
+ "sqlx-postgres",
+ "sqlx-sqlite",
+ "syn 2.0.117",
+ "tokio",
+ "url",
+]
+
+[[package]]
+name = "sqlx-mysql"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526"
+dependencies = [
+ "atoi",
+ "base64",
+ "bitflags",
+ "byteorder",
+ "bytes",
+ "chrono",
+ "crc",
+ "digest 0.10.7",
+ "dotenvy",
+ "either",
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-util",
+ "generic-array",
+ "hex",
+ "hkdf",
+ "hmac 0.12.1",
+ "itoa",
+ "log",
+ "md-5",
+ "memchr",
+ "once_cell",
+ "percent-encoding",
+ "rand 0.8.6",
+ "rsa",
+ "serde",
+ "sha1",
+ "sha2 0.10.9",
+ "smallvec",
+ "sqlx-core",
+ "stringprep",
+ "thiserror 2.0.18",
+ "tracing",
+ "uuid",
+ "whoami",
+]
+
+[[package]]
+name = "sqlx-postgres"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46"
+dependencies = [
+ "atoi",
+ "base64",
+ "bitflags",
+ "byteorder",
+ "chrono",
+ "crc",
+ "dotenvy",
+ "etcetera",
+ "futures-channel",
+ "futures-core",
+ "futures-util",
+ "hex",
+ "hkdf",
+ "hmac 0.12.1",
+ "home",
+ "itoa",
+ "log",
+ "md-5",
+ "memchr",
+ "once_cell",
+ "rand 0.8.6",
+ "serde",
+ "serde_json",
+ "sha2 0.10.9",
+ "smallvec",
+ "sqlx-core",
+ "stringprep",
+ "thiserror 2.0.18",
+ "tracing",
+ "uuid",
+ "whoami",
+]
+
+[[package]]
+name = "sqlx-sqlite"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2d12fe70b2c1b4401038055f90f151b78208de1f9f89a7dbfd41587a10c3eea"
+dependencies = [
+ "atoi",
+ "chrono",
+ "flume",
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-intrusive",
+ "futures-util",
+ "libsqlite3-sys",
+ "log",
+ "percent-encoding",
+ "serde",
+ "serde_urlencoded",
+ "sqlx-core",
+ "thiserror 2.0.18",
+ "tracing",
+ "url",
+ "uuid",
+]
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
+
+[[package]]
+name = "stringprep"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1"
+dependencies = [
+ "unicode-bidi",
+ "unicode-normalization",
+ "unicode-properties",
+]
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "subtle"
+version = "2.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
+
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.117"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "sync_wrapper"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
+dependencies = [
+ "futures-core",
+]
+
+[[package]]
+name = "synstructure"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "tap"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
+
+[[package]]
+name = "thiserror"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl 1.0.69",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
+dependencies = [
+ "thiserror-impl 2.0.18",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "thread_local"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "time"
+version = "0.3.47"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c"
+dependencies = [
+ "deranged",
+ "itoa",
+ "num-conv",
+ "powerfmt",
+ "serde_core",
+ "time-core",
+ "time-macros",
+]
+
+[[package]]
+name = "time-core"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca"
+
+[[package]]
+name = "time-macros"
+version = "0.2.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215"
+dependencies = [
+ "num-conv",
+ "time-core",
+]
+
+[[package]]
+name = "tinystr"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d"
+dependencies = [
+ "displaydoc",
+ "zerovec",
+]
+
+[[package]]
+name = "tinyvec"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3"
+dependencies = [
+ "tinyvec_macros",
+]
+
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+
+[[package]]
+name = "tokio"
+version = "1.52.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe"
+dependencies = [
+ "bytes",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "socket2 0.6.4",
+ "tokio-macros",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "tokio-rustls"
+version = "0.24.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
+dependencies = [
+ "rustls 0.21.12",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-rustls"
+version = "0.26.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61"
+dependencies = [
+ "rustls 0.23.40",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-stream"
+version = "0.1.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70"
+dependencies = [
+ "futures-core",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-tungstenite"
+version = "0.24.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9"
+dependencies = [
+ "futures-util",
+ "log",
+ "rustls 0.23.40",
+ "rustls-pki-types",
+ "tokio",
+ "tokio-rustls 0.26.4",
+ "tungstenite",
+ "webpki-roots 0.26.11",
+]
+
+[[package]]
+name = "tokio-util"
+version = "0.7.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "futures-sink",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "toml_datetime"
+version = "1.1.1+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "toml_edit"
+version = "0.25.12+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2153edc6955a6c354fad8f5efd38b6a8769bdccf9fe50f8e1329f81b0baa5d7"
+dependencies = [
+ "indexmap",
+ "toml_datetime",
+ "toml_parser",
+ "winnow",
+]
+
+[[package]]
+name = "toml_parser"
+version = "1.1.2+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526"
+dependencies = [
+ "winnow",
+]
+
+[[package]]
+name = "tower"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
+dependencies = [
+ "futures-core",
+ "futures-util",
+ "pin-project-lite",
+ "sync_wrapper",
+ "tokio",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tower-cookies"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fd0118512cf0b3768f7fcccf0bef1ae41d68f2b45edc1e77432b36c97c56c6d"
+dependencies = [
+ "async-trait",
+ "axum-core",
+ "cookie",
+ "futures-util",
+ "http 1.4.1",
+ "parking_lot",
+ "pin-project-lite",
+ "tower-layer",
+ "tower-service",
+]
+
+[[package]]
+name = "tower-http"
+version = "0.6.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cfcf7e2740e6fc6d4d688b4ef00650406bb94adf4731e43c096c3a19fe40840"
+dependencies = [
+ "async-compression",
+ "bitflags",
+ "bytes",
+ "futures-core",
+ "futures-util",
+ "http 1.4.1",
+ "http-body 1.0.1",
+ "http-body-util",
+ "http-range-header",
+ "httpdate",
+ "mime",
+ "mime_guess",
+ "percent-encoding",
+ "pin-project-lite",
+ "tokio",
+ "tokio-util",
+ "tower",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+ "url",
+]
+
+[[package]]
+name = "tower-layer"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
+
+[[package]]
+name = "tower-service"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
+
+[[package]]
+name = "tracing"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
+dependencies = [
+ "log",
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
+dependencies = [
+ "once_cell",
+ "valuable",
+]
+
+[[package]]
+name = "tracing-log"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
+dependencies = [
+ "log",
+ "once_cell",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-subscriber"
+version = "0.3.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319"
+dependencies = [
+ "matchers",
+ "nu-ansi-term",
+ "once_cell",
+ "regex-automata",
+ "sharded-slab",
+ "smallvec",
+ "thread_local",
+ "tracing",
+ "tracing-core",
+ "tracing-log",
+]
+
+[[package]]
+name = "try-lock"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
+
+[[package]]
+name = "tungstenite"
+version = "0.24.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a"
+dependencies = [
+ "byteorder",
+ "bytes",
+ "data-encoding",
+ "http 1.4.1",
+ "httparse",
+ "log",
+ "rand 0.8.6",
+ "rustls 0.23.40",
+ "rustls-pki-types",
+ "sha1",
+ "thiserror 1.0.69",
+ "utf-8",
+]
+
+[[package]]
+name = "typenum"
+version = "1.20.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20"
+
+[[package]]
+name = "unicase"
+version = "2.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
+
+[[package]]
+name = "unicode-bidi"
+version = "0.3.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "unicode-normalization"
+version = "0.1.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
+dependencies = [
+ "tinyvec",
+]
+
+[[package]]
+name = "unicode-properties"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
+
+[[package]]
+name = "untrusted"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
+
+[[package]]
+name = "url"
+version = "2.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed"
+dependencies = [
+ "form_urlencoded",
+ "idna",
+ "percent-encoding",
+ "serde",
+]
+
+[[package]]
+name = "urlencoding"
+version = "2.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
+
+[[package]]
+name = "utf-8"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
+
+[[package]]
+name = "utf8_iter"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "utoipa"
+version = "5.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8bde15df68e80b16c7d16b9616e80770ad158988daa56a27dccd1e55558b0160"
+dependencies = [
+ "indexmap",
+ "serde",
+ "serde_json",
+ "utoipa-gen",
+]
+
+[[package]]
+name = "utoipa-gen"
+version = "5.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ba0b99ee52df3028635d93840c797102da61f8a7bb3cf751032455895b52ef8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "regex",
+ "syn 2.0.117",
+ "uuid",
+]
+
+[[package]]
+name = "utoipa-swagger-ui"
+version = "8.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db4b5ac679cc6dfc5ea3f2823b0291c777750ffd5e13b21137e0f7ac0e8f9617"
+dependencies = [
+ "axum",
+ "base64",
+ "mime_guess",
+ "regex",
+ "rust-embed",
+ "serde",
+ "serde_json",
+ "url",
+ "utoipa",
+ "zip",
+]
+
+[[package]]
+name = "uuid"
+version = "1.23.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d258b83ceec21034727ecee8c382cfa6c3e133699b0742c64571814fb420c9f7"
+dependencies = [
+ "getrandom 0.4.2",
+ "js-sys",
+ "serde_core",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "valuable"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
+
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+
+[[package]]
+name = "vsimd"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64"
+
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
+[[package]]
+name = "want"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
+dependencies = [
+ "try-lock",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasip2"
+version = "1.0.3+wasi-0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6"
+dependencies = [
+ "wit-bindgen 0.57.1",
+]
+
+[[package]]
+name = "wasip3"
+version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
+dependencies = [
+ "wit-bindgen 0.51.0",
+]
+
+[[package]]
+name = "wasite"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.122"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "serde",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-futures"
+version = "0.4.72"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9473dbd2991ae90b6291c3c32c30c6187ac49aa32f9905d1cce280ec1e110b0f"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.122"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.122"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e"
+dependencies = [
+ "bumpalo",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.122"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "wasm-encoder"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319"
+dependencies = [
+ "leb128fmt",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasm-metadata"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
+dependencies = [
+ "anyhow",
+ "indexmap",
+ "wasm-encoder",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasmparser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
+dependencies = [
+ "bitflags",
+ "hashbrown 0.15.5",
+ "indexmap",
+ "semver",
+]
+
+[[package]]
+name = "web-sys"
+version = "0.3.99"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d621441cfc37b84979402712047321980c178f299193a3589d05b99e8763436"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "web-time"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "webpki-roots"
+version = "0.26.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
+dependencies = [
+ "webpki-roots 1.0.7",
+]
+
+[[package]]
+name = "webpki-roots"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d"
+dependencies = [
+ "rustls-pki-types",
+]
+
+[[package]]
+name = "whoami"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d"
+dependencies = [
+ "libredox",
+ "wasite",
+]
+
+[[package]]
+name = "winapi-util"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.62.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-result"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
+dependencies = [
+ "windows-targets 0.48.5",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
+dependencies = [
+ "windows_aarch64_gnullvm 0.48.5",
+ "windows_aarch64_msvc 0.48.5",
+ "windows_i686_gnu 0.48.5",
+ "windows_i686_msvc 0.48.5",
+ "windows_x86_64_gnu 0.48.5",
+ "windows_x86_64_gnullvm 0.48.5",
+ "windows_x86_64_msvc 0.48.5",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "winnow"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "wit-bindgen"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
+dependencies = [
+ "wit-bindgen-rust-macro",
+]
+
+[[package]]
+name = "wit-bindgen"
+version = "0.57.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e"
+
+[[package]]
+name = "wit-bindgen-core"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc"
+dependencies = [
+ "anyhow",
+ "heck",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-bindgen-rust"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
+dependencies = [
+ "anyhow",
+ "heck",
+ "indexmap",
+ "prettyplease",
+ "syn 2.0.117",
+ "wasm-metadata",
+ "wit-bindgen-core",
+ "wit-component",
+]
+
+[[package]]
+name = "wit-bindgen-rust-macro"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a"
+dependencies = [
+ "anyhow",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+ "wit-bindgen-core",
+ "wit-bindgen-rust",
+]
+
+[[package]]
+name = "wit-component"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
+dependencies = [
+ "anyhow",
+ "bitflags",
+ "indexmap",
+ "log",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "wasm-encoder",
+ "wasm-metadata",
+ "wasmparser",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-parser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
+dependencies = [
+ "anyhow",
+ "id-arena",
+ "indexmap",
+ "log",
+ "semver",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "unicode-xid",
+ "wasmparser",
+]
+
+[[package]]
+name = "writeable"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4"
+
+[[package]]
+name = "wyz"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
+dependencies = [
+ "tap",
+]
+
+[[package]]
+name = "xmlparser"
+version = "0.13.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4"
+
+[[package]]
+name = "yoke"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca"
+dependencies = [
+ "stable_deref_trait",
+ "yoke-derive",
+ "zerofrom",
+]
+
+[[package]]
+name = "yoke-derive"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+ "synstructure",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.50"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b065d4f0e55f82fae73202e189638116a87c55ab6b8e6c2721e13dd9d854ad1"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.50"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b631b19d36a892ab55420c92dbc83ccd79274f25be714855d3074aa71cab639"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "zerofrom"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272"
+dependencies = [
+ "zerofrom-derive",
+]
+
+[[package]]
+name = "zerofrom-derive"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+ "synstructure",
+]
+
+[[package]]
+name = "zeroize"
+version = "1.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
+
+[[package]]
+name = "zerotrie"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
+]
+
+[[package]]
+name = "zerovec"
+version = "0.11.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239"
+dependencies = [
+ "yoke",
+ "zerofrom",
+ "zerovec-derive",
+]
+
+[[package]]
+name = "zerovec-derive"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
+[[package]]
+name = "zip"
+version = "2.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50"
+dependencies = [
+ "arbitrary",
+ "crc32fast",
+ "crossbeam-utils",
+ "displaydoc",
+ "flate2",
+ "indexmap",
+ "memchr",
+ "thiserror 2.0.18",
+ "zopfli",
+]
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
+
+[[package]]
+name = "zopfli"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249"
+dependencies = [
+ "bumpalo",
+ "crc32fast",
+ "log",
+ "simd-adler32",
+]
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
new file mode 100644
index 00000000..46bc3eb1
--- /dev/null
+++ b/rust/Cargo.toml
@@ -0,0 +1,104 @@
+# DecodingUs — Rust workspace (rewrite of the Play/Scala 3 app)
+# See /Users/jkane/.claude/plans/robust-knitting-lampson.md
+[workspace]
+resolver = "2"
+members = [
+ "crates/du-db",
+ "crates/du-external",
+ "crates/du-web",
+ "crates/du-jobs",
+ "crates/du-migrate",
+]
+# du-domain, du-atproto, du-bio live in the sibling `decodingus-shared` repo
+# (shared with Navigator) and are pulled in via the path deps below.
+
+[workspace.package]
+version = "0.1.0"
+edition = "2021"
+rust-version = "1.80"
+license = "BSD-3-Clause"
+repository = "https://github.com/decodingus/decodingus"
+
+[workspace.dependencies]
+# Shared crates — git deps pinned to a rev on the decodingus-shared repo (this
+# also unblocks the Docker build, whose context can't reach a sibling path dep).
+# To update: push decodingus-shared, then bump `rev` (or switch to a pushed tag,
+# e.g. `tag = "v0.1.0"`). For local co-dev against working-tree changes, add a
+# [patch] section pointing these back at ../../decodingus-shared/crates/*.
+du-domain = { git = "https://github.com/JamesKane/decodingus-shared.git", rev = "f975a0835fe67b1e86d919fcb085fca4db1cb1f4" }
+du-atproto = { git = "https://github.com/JamesKane/decodingus-shared.git", rev = "f975a0835fe67b1e86d919fcb085fca4db1cb1f4" }
+du-bio = { git = "https://github.com/JamesKane/decodingus-shared.git", rev = "f975a0835fe67b1e86d919fcb085fca4db1cb1f4" }
+# Internal (decodingus-only) crates
+du-db = { path = "crates/du-db" }
+du-external = { path = "crates/du-external" }
+
+# Async runtime + web
+tokio = { version = "1", features = ["full"] }
+axum = { version = "0.7", features = ["macros"] }
+tower = "0.5"
+tower-http = { version = "0.6", features = ["fs", "trace", "compression-gzip", "catch-panic"] }
+tower-cookies = { version = "0.10", features = ["signed"] }
+
+# Database (runtime-checked queries for now; switch to compile-time macros + .sqlx
+# offline cache once a dev DB is reachable — see plan §3 / §9)
+sqlx = { version = "0.8", default-features = false, features = [
+ "runtime-tokio-rustls", "postgres", "uuid", "chrono", "json", "macros", "migrate",
+] }
+
+# Templating (typed, compile-time — Twirl analog)
+askama = "0.12"
+
+# OpenAPI surface (Tapir analog): typed API descriptions + Swagger UI
+utoipa = { version = "5", features = ["axum_extras", "chrono", "uuid"] }
+utoipa-swagger-ui = { version = "8", features = ["axum"] }
+
+# Serialization / JSONB payloads
+serde = { version = "1", features = ["derive"] }
+serde_json = "1"
+
+# Common types
+uuid = { version = "1", features = ["v4", "serde"] }
+chrono = { version = "0.4", features = ["serde"] }
+rust_decimal = "1"
+
+# HTTP client (external APIs)
+reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
+
+# WebSocket client (Jetstream coverage-mirror consumer in du-jobs)
+tokio-tungstenite = { version = "0.24", default-features = false, features = ["connect", "rustls-tls-webpki-roots"] }
+futures-util = { version = "0.3", default-features = false }
+
+# URL/query encoding (language-switcher `next` param)
+percent-encoding = "2"
+
+# AT Protocol identity/crypto
+multibase = "0.9"
+base64 = "0.22"
+sha2 = "0.10"
+p256 = { version = "0.13", features = ["ecdsa"] }
+rand_core = { version = "0.6", features = ["getrandom"] }
+
+# CLI
+clap = { version = "4", features = ["derive"] }
+
+# Errors / logging / config
+thiserror = "2"
+anyhow = "1"
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+
+# Crypto
+argon2 = { version = "0.5", features = ["std"] }
+bcrypt = "0.16"
+ed25519-dalek = "2"
+aes-gcm = "0.10"
+
+# NB: raw-read processing (BAM/CRAM) and variant calling are OUT OF SCOPE for the
+# AppView — Navigator (edge) does local calling and the AppView aggregates the
+# resulting summaries/proposals. du-bio here is text-format + coordinate math
+# (VCF variant ingest, BED callable loci, liftover), so no htslib/noodles dep.
+
+[profile.release]
+lto = "thin"
+codegen-units = 1
+strip = true
diff --git a/rust/Dockerfile b/rust/Dockerfile
new file mode 100644
index 00000000..f97759e4
--- /dev/null
+++ b/rust/Dockerfile
@@ -0,0 +1,51 @@
+# DecodingUs (Rust) — multi-stage build to a single static-ish binary.
+# No JRE, no htslib system lib (genomics is pure-Rust via noodles).
+#
+# docker build -t decodingus -f rust/Dockerfile rust
+# (or via compose.yaml)
+
+# ── builder ──────────────────────────────────────────────────────────────────
+FROM rust:1-bookworm AS builder
+WORKDIR /build
+
+# Cache dependencies: copy manifests first, then sources.
+# NB: du-domain/du-atproto/du-bio live in the sibling `decodingus-shared` repo
+# and are currently path deps (../../decodingus-shared/crates/*), which are NOT
+# in this build context. Before building the image, switch those three to git
+# deps (see rust/Cargo.toml) once decodingus-shared is pushed — then this builds
+# self-contained. (Alternatively, build from a parent context containing both.)
+COPY Cargo.toml Cargo.lock ./
+COPY crates ./crates
+COPY migrations ./migrations
+# SQLx is built in offline mode in CI/Docker (no DB at build time). Once a dev DB
+# exists, commit the `.sqlx/` cache and this picks it up automatically.
+ENV SQLX_OFFLINE=true
+RUN cargo build --release --bin decodingus --bin decodingus-jobs --bin decodingus-migrate
+
+# ── runtime ──────────────────────────────────────────────────────────────────
+FROM debian:bookworm-slim AS runtime
+# curl for the healthcheck; ca-certificates for outbound TLS (OpenAlex/ENA/AWS).
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends ca-certificates curl \
+ && rm -rf /var/lib/apt/lists/*
+
+RUN groupadd -r decodingus && useradd -r -g decodingus decodingus
+WORKDIR /app
+
+COPY --from=builder /build/target/release/decodingus /usr/local/bin/decodingus
+COPY --from=builder /build/target/release/decodingus-jobs /usr/local/bin/decodingus-jobs
+COPY --from=builder /build/target/release/decodingus-migrate /usr/local/bin/decodingus-migrate
+# Vendored static assets + migrations shipped alongside the binary. (Askama
+# templates and locale catalogs are embedded into the binary at compile time.)
+COPY --chown=decodingus:decodingus crates/du-web/assets ./assets
+COPY --chown=decodingus:decodingus migrations ./migrations
+
+USER decodingus
+EXPOSE 9000
+ENV PORT=9000
+ENV DU_ASSETS_DIR=/app/assets
+
+HEALTHCHECK --interval=30s --timeout=10s --start-period=20s --retries=3 \
+ CMD curl -fsS http://localhost:9000/health || exit 1
+
+CMD ["decodingus"]
diff --git a/rust/Makefile b/rust/Makefile
new file mode 100644
index 00000000..e8aaf23a
--- /dev/null
+++ b/rust/Makefile
@@ -0,0 +1,37 @@
+# DecodingUs (Rust) dev tasks.
+.PHONY: help test db-up db-down oauth-up oauth-web oauth-dev oauth-env oauth-down
+
+help:
+ @echo "Targets:"
+ @echo " test cargo test --workspace (set DATABASE_URL for live-DB tests)"
+ @echo " db-up start the dev Postgres (prints DATABASE_URL)"
+ @echo " db-down stop the dev Postgres"
+ @echo " oauth-dev bring up the local OAuth stack (PDS+Caddy+PG) AND run du-web"
+ @echo " oauth-up bring up the OAuth stack only (PDS+Caddy+PG, CA, test account)"
+ @echo " oauth-web run du-web on the host with the dev OAuth env"
+ @echo " oauth-env print the generated dev env"
+ @echo " oauth-down stop+remove PDS+Caddy"
+
+test:
+ cargo test --workspace
+
+db-up:
+ @./scripts/test-db.sh up
+
+db-down:
+ @./scripts/test-db.sh down
+
+# One command: stack up, then du-web (Ctrl-C stops du-web; `make oauth-down` clears containers).
+oauth-dev: oauth-up oauth-web
+
+oauth-up:
+ @./scripts/oauth-dev.sh up
+
+oauth-web:
+ @./scripts/oauth-dev.sh web
+
+oauth-env:
+ @./scripts/oauth-dev.sh env
+
+oauth-down:
+ @./scripts/oauth-dev.sh down
diff --git a/rust/README.md b/rust/README.md
new file mode 100644
index 00000000..c40d5de8
--- /dev/null
+++ b/rust/README.md
@@ -0,0 +1,367 @@
+# DecodingUs — Rust port
+
+A rewrite of the DecodingUs platform (originally Play Framework / Scala 3) in
+Rust. It coexists with the Scala app during the transition and replaces it at a
+single cutover. The Rust app is the **AppView**: a curated Y/mtDNA phylogenetic
+catalog, a public read surface + JSON API, the curator tooling, and
+privacy-preserving federated *reporting* (it aggregates, it does not analyze).
+
+**Status:** the spine is done — redesigned schema, data layer, public HTML/HTMX +
+JSON API, auth + the full curator suite, tree versioning + merge + SNP-graft, the
+multi-source tree build (ISOGG + decoding-us + FTDNA, Y + mt), the production ETL,
+the YBrowse ingestion pipeline, the variant naming authority, and the federated
+reporting mirror. Workspace builds clean; live-DB integration tests (gated on
+`DATABASE_URL`) + unit tests pass. The data cutover is **verified end-to-end** on a
+real prod dump (ETL `--skip-tree` + tree-init); what's left is executing it and the
+live AT-Protocol OAuth handshake (verified to consent locally; confidential
+round-trip is an Edge joint test), plus the re-scoped federation subsystems (IBD
+coordination, social, sequencer-lab inference). See [Roadmap](#roadmap). A living,
+detailed status lives in [`STATUS.md`](STATUS.md); the feature-by-feature
+comparison with the Scala app is in [`docs/scala-vs-rust-diff.md`](docs/scala-vs-rust-diff.md).
+
+---
+
+## Why the rewrite
+
+- Drop the JVM's memory/startup overhead for a single static binary.
+- Replace a sprawling, accreted schema (~84 tables across 6 schemas + a second
+ "metadata" DB) with a de-sprawled design that leans on Postgres **JSONB** for
+ document-shaped data.
+- Run fully **Docker-less for local dev/test** on Apple Silicon via Apple's
+ `container` CLI, while remaining Docker-deployable for production.
+
+## Stack
+
+| Concern | Choice |
+|---|---|
+| Web | **Axum** 0.7 (+ tower / tower-http / tower-cookies) |
+| Templates | **Askama** (compile-time typed, Twirl analog) |
+| Frontend | **HTMX** 2 + Bootstrap 5 (vendored), HATEOAS-first |
+| Database | **SQLx** 0.8 (Postgres, runtime-checked queries) |
+| Genomics | `du-bio` — coordinate math + text parsing (VCF / BED callable-loci / UCSC-chain liftover); the YBrowse GFF3 parser lives in `du-jobs`. Raw reads (BAM/CRAM) + calling are out of scope (done in Navigator) |
+| Async | **tokio** |
+| Auth | AT Protocol OAuth (PKCE/DPoP/`private_key_jwt`); legacy Argon2 sessions for dev/curator seeding |
+| External | OpenAlex, ENA, NCBI/PubMed; AWS SES + Secrets Manager behind the `aws` feature; reCAPTCHA |
+| i18n | embedded `key=value` catalogs (en/es/fr) |
+| Local Postgres | Apple `container` running `imresamu/postgis` (arm64) |
+
+## Workspace layout
+
+Shared crates live in the sibling **`decodingus-shared`** repo (also consumed by
+Navigator), pulled in as **git deps pinned to a rev** in `Cargo.toml` (so the
+Docker build needs no sibling path). To co-develop locally, add a `[patch]`
+pointing the three deps back at the sibling working tree.
+
+```
+github.com/JamesKane/decodingus-shared (separate repo)
+ du-domain/ pure types/enums/IDs + JSONB payload structs + the merge algorithm, no IO
+ du-atproto/ AT Protocol identity/crypto + OAuth client (PKCE/DPoP/metadata)
+ du-bio/ genomics: callable-loci (BED), liftover (UCSC chain), VCF reader
+
+rust/ (this repo — AppView/server-specific)
+ crates/
+ du-db/ SQLx pool + per-aggregate query modules + versioning/merge/graft/naming/ybrowse engines
+ du-external/ OpenAlex / ENA / NCBI / AWS SES / Secrets clients
+ du-web/ Axum app: routes, Askama templates, i18n, HTMX, auth, OAuth, JSON API
+ du-jobs/ tokio scheduler + scheduled jobs + the Jetstream reporting-mirror consumer
+ du-migrate/ legacy → new-schema ETL + the `decodingus-tree-init` tree builder (ISOGG/decoding-us/FTDNA graft, Y + mt)
+ migrations/ redesigned schema (0001–0022)
+ locales/ en / es / fr message catalogs
+ docs/ STATUS pointers, Scala↔Rust diff, AT-Proto OAuth findings
+ scripts/ test-db.sh (Apple container), mock-legacy.sql
+ Dockerfile, compose.yaml, .env.example
+```
+
+## Schema redesign (`migrations/0001`–`0022`)
+
+Postgres schemas: `core`, `tree`, `genomics`, `pubs`, `ident`, `fed`, `ibd`,
+`social`, `support`, `billing`, `source`. Key de-sprawl moves:
+
+- **3 biosample tables → 1** `core.biosample` (a `source` enum discriminator +
+ `source_attrs` JSONB).
+- **Deprecated child tables folded into JSONB** on their parents (variant aliases
+ & coordinates, sequence-file checksums/locations, alignment coverage, original
+ haplogroups, per-revision tree metadata).
+- The legacy second **"metadata" database collapses into the `fed` schema**.
+- Scattered `at_uri`/`at_cid` columns → one consistent **`atproto` JSONB** column.
+- **Tree** is temporal: no `parent_id`; hierarchy lives in
+ `tree.haplogroup_relationship` with bitemporal `valid_from`/`valid_until`.
+- **Universal variant model**: one `core.variant` per physical SNP **site**;
+ `canonical_name` (nullable — unnamed variants are identified by coordinates),
+ `aliases`/`coordinates`/`evidence` JSONB, `naming_status`, `mutation_type`.
+ Coordinates carry `ancestral`/`derived` (the reference genome ≠ phylogenetic
+ root); recurrence (homoplasy) is modeled per-link on `tree.haplogroup_variant`
+ (`ancestral_allele`/`derived_allele`), so forward / back-mutation / recurrent
+ occurrences are representable (mig 0021).
+- PostGIS (`geometry(Point,4326)`), `citext`, native enums, GIN/GiST/expression
+ indexes on queried JSONB paths.
+
+## What's implemented
+
+### Public surface (server-rendered, HTMX, i18n en/es/fr)
+
+| Area | Routes |
+|---|---|
+| Home / about / contact / reputation / terms / privacy / cookies / FAQ | `/` `/about` `/contact` `/reputation` `/terms` `/privacy` `/cookies` `/faq` |
+| Per-sample report (public, `is_public`-gated) | `/sample/:slug` (identity, Y/mt haplogroup pathways, origin map, sequencing/coverage, ancestry; curator `is_public` toggle) |
+| Variant browser | `/variants` (+ fragments; JSONB alias/rs-id search) |
+| Y/MT tree — two SVG cladograms (horizontal + vertical) | `/ytree` `/mtree` (breadcrumb re-root, orientation toggle, name/variant search, SNP sidebar, backbone/recent coloring) |
+| References + per-publication biosamples; suggest-a-paper | `/references` (+ report), `/references/submit` (public DOI → candidate queue) |
+| Biosample map (PostGIS → Leaflet GeoJSON) | `/biosamples/map` `/biosamples/geo-data` |
+| Coverage benchmarks + per-lab drill-down | `/coverage-benchmarks` `/coverage/labs` |
+| Profile (view + display-name update); contact (reCAPTCHA) | `/profile` `/contact` |
+| sitemap / robots / health; cookie-consent banner | `/sitemap.xml` `/robots.txt` `/health` `/cookie-consent` |
+| Public JSON API + OpenAPI 3 / Swagger UI | `/api`, `/api/v1/*` (see below) |
+
+### Public JSON API (`/api/v1/*`, OpenAPI at `/api`)
+
+Y/MT tree, coverage benchmarks, references + biosamples, biosample studies,
+variant search/detail/by-haplogroup, variant **CSV** + **GFF3** export, genome
+regions, STR signature + prediction, branch age, the **per-sample report**
+(`/samples/:slug`), and federated population **reports**
+(`/reports/{coverage,ancestry,haplogroups}`).
+
+### Auth & curator
+
+AT Protocol **OAuth** (`/login/atproto`, dev public-client path); legacy
+signed-cookie sessions for dev. `Curator` RBAC guard. The curator dashboard
+(`/curator`) links a full suite:
+
+| Tool | Route | What |
+|---|---|---|
+| Haplogroups | `/curator/haplogroups` | CRUD + structural ops: **reparent / merge-into-parent / split** (direct temporal edits, cycle/name guards) |
+| Variants | `/curator/variants` | CRUD; alias/coordinate JSONB editing |
+| Genome regions | `/curator/regions` | CRUD (coordinates/properties JSONB) |
+| Curation proposals | `/curator/proposals` | review/promote Navigator-submitted branch proposals → catalog |
+| Publication candidates | `/curator/publications` | review OpenAlex discoveries → promote to references |
+| Change-sets | `/curator/change-sets` | tree-versioning lifecycle + diff + per-change review/apply |
+| Merge review | `/curator/reviews` | resolve SNP-graft flags / merge ambiguities via the `wip_*` staging tables (accept-anchor / reparent / merge / defer) |
+| Variant naming | `/curator/naming` | the **DU naming authority**: queue + mint `DUxxxxx` + lifecycle |
+| Reconcile flags | `/curator/reconcile-flags` | merge YBrowse synonym clusters split across catalog variants |
+
+A separate **management API** for machine/curator callers lives under
+`/manage/*` (change-set lifecycle, `/manage/haplogroups/merge[/preview]`,
+`/manage/curation/proposals` X-API-Key intake) — deliberately outside the public
+`/api/v1`.
+
+### Tree versioning, merge & SNP-graft
+
+- **Change-set** lifecycle (DRAFT → READY_FOR_REVIEW → UNDER_REVIEW →
+ APPLIED/DISCARDED), per-change review, diff, and a temporal apply engine
+ (CREATE/UPDATE/DELETE/REPARENT/VARIANT_EDIT) — including a **WIP pass** that
+ enacts curator merge-review resolutions.
+- **Tree merge** (Identify-Match-Graft) — a pure `du-domain::merge`
+ re-implementation against curated fixtures (the legacy was buggy):
+ subtree-scoped matching, ambiguity-flagged-not-guessed, materialized into a
+ reviewable change-set.
+- **SNP-anchored graft** (`du-db::snp_graft`) — reconciles external source trees
+ (decoding-us, **FTDNA**) into the **ISOGG foundation** by SNP plurality: enrich
+ matches, graft truly-novel branches, flag the rest for curator review. A
+ `--reattach` mode anchors FTDNA's complete-topology "bushes" via vetted MATCH
+ dispositions when their backbone ancestor is flagged. A **recurrent-link scrub**
+ (`scrub_recurrent_links`) prunes homoplasic / ASR-scatter defining-variant links
+ to each variant's primary lineage. The **mtDNA tree** is FTDNA-only (single RSRS
+ root), loaded as its own foundation (`--ftdna-foundation`). Result: a single-root
+ Y tree (ISOGG-named backbone + decoding-us + full FTDNA depth) + an RSRS-rooted
+ mt tree.
+
+### Variant Naming Authority
+
+DecodingUs owns the `DU` Y-variant prefix. `core.du_variant_name_seq` +
+`core.next_du_name()`, a curator naming queue (`UNNAMED`→`PENDING_REVIEW`→`NAMED`,
+mint-on-assign with same-coordinate dedup), and a GFF3 propagation export
+(`/api/v1/variants/export.gff`).
+
+### YBrowse ingestion (the central authority document)
+
+`snps_hg38.gff3` (~3M SNP lines, full snapshot, no deltas) is streamed into the
+verbatim **`source.ybrowse_snp` mirror**; `du-db::ybrowse::reconcile` then
+*derives* `core.variant` so curator decisions survive every re-ingest:
+synonyms fold deterministically (strand-canonical key; INDELs VCF-trim-normalized;
+MNPs left intact), existing catalog variants match by name **or coordinate** and
+are enriched in place (canonical/`naming_status` locked), and clusters split
+across multiple existing rows are flagged for `/curator/reconcile-flags`.
+
+### Federation (outbound, summaries only)
+
+A long-lived **Jetstream consumer** mirrors Navigator's published anonymized
+computed-summary records into dedicated `fed.*` reporting tables (PII-bearing
+records keep typed anonymized columns only). Reports aggregate via query-time SQL.
+The inbound credential-holding firehose / PDS-fleet model is **dropped**;
+curators submit branch proposals through the machine-auth intake endpoint.
+
+### Genomics, STR & age
+
+`du-bio` (BED callable-loci, UCSC chain liftover, VCF). Y-STR per-branch modal
+signatures + STR→branch prediction + STR-variance age; a combined branch-age
+estimate (McDonald 2021: SNP-Poisson + STR + genealogical/aDNA anchor terms,
+inverse-variance combined) gap-filling `tmrca_ybp`.
+
+### Scheduled jobs (`du-jobs`)
+
+`db-heartbeat`, `ybrowse-variant-ingest` (GFF3 → mirror → reconcile),
+`publication-update` (OpenAlex), `publication-discovery`,
+`publication-pubmed-update` (NCBI), `ena-study-enrichment`,
+`branch-age-recompute` (STR + combined age) — plus the Jetstream reporting-mirror
+consumer. Error-isolated; each registers only when its env config is present.
+
+### ETL (`du-migrate`)
+
+Legacy → new schema, preserving PKs and `sample_guid` so FKs carry over 1:1;
+idempotent; runs target migrations then the transformers + a reconciliation pass.
+Covers the full production surface — catalog (donors, biosamples, variants, tree,
+studies, publications), ident/auth (users, RBAC, AT-Protocol OAuth/PDS, consent,
+audit), and genomics (labs, instruments, test types, libraries/files, alignment +
+pangenome coverage, genotype data, pangenome graph). Validated against a real
+production dump — **all aggregates reconcile**. `--skip-tree` omits the legacy
+haplogroup tree (it's built ISOGG-founded by `decodingus-tree-init` instead;
+biosamples carry their haplogroup names as JSON and resolve at read time).
+`decodingus-tree-init` builds the Y tree (ISOGG foundation + graft decoding-us +
+graft FTDNA + scrub) and the mt tree (FTDNA foundation).
+
+## Getting started
+
+### Prerequisites
+
+- Rust (stable) — `cargo`.
+- A container runtime for the local database. `test-db.sh` prefers **Apple
+ `container`** (Docker-less, the default on Apple Silicon) and falls back to
+ **Docker** only if Apple `container` isn't installed. With Apple `container`,
+ set it up once (and again whenever the service is stopped):
+ ```sh
+ container system start # starts the service; installs the Linux kernel on first run
+ ```
+ (Already running Postgres elsewhere? Set `DATABASE_URL` and `test-db.sh` uses
+ it as-is, starting no container at all.)
+
+### Run the app
+
+```sh
+# Start Postgres (PostGIS) and print the DATABASE_URL to export:
+eval "$(./scripts/test-db.sh up)" # Apple container gives it its own IP
+
+# Run the web server (connects + applies migrations on startup):
+DATABASE_URL=... APP_SECRET=<32+ chars> cargo run -p du-web # serves on :9000 (PORT to change)
+```
+
+Apple `container` assigns each container its own IP (no `localhost` port
+forwarding), so `test-db.sh` discovers it and emits the right `DATABASE_URL`
+(e.g. `postgres://postgres:dev@192.168.64.2:5432/decodingus`). Stop it with
+`./scripts/test-db.sh down`.
+
+> **Gotcha:** if a *committed* migration changes, recreate the dev DB — SQLx
+> errors on a checksum mismatch.
+
+### Seed a curator (to use the curator tools)
+
+```sh
+HASH=$(cargo run -q -p du-web --bin decodingus -- hash-password 'yourpassword')
+# then insert ident.users + ident.user_login_info(provider_id='credentials',
+# provider_key='', password_hash=$HASH) + ident.user_roles('TreeCurator').
+```
+
+## Testing
+
+```sh
+eval "$(./scripts/test-db.sh up)"
+cargo test --workspace
+```
+
+Integration tests are gated on `DATABASE_URL`: with it set they run against the
+live PostGIS (migrations, JSONB round-trips, query modules, the apply/merge/graft/
+reconcile engines); without it they skip and the suite stays green. The i18n test
+enforces that es/fr cover every English key.
+
+## Running the ETL
+
+The production source is a self-managed Postgres on EC2.
+
+```sh
+decodingus-migrate \
+ --legacy "postgres://user:pass@ec2-host:5432/decodingus?sslmode=require" \
+ --target "$DATABASE_URL" # runs transformers + reconciliation
+
+decodingus-migrate --legacy ... --target ... --verify # counts only
+decodingus-migrate --legacy ... --target ... --skip-tree # skip the tree (build via tree-init)
+```
+
+For the cutover, run **`--skip-tree` first, then `decodingus-tree-init`** (below):
+the tree is built ISOGG-founded rather than migrated, and `tree-init`'s foundation
+load needs the tree namespace empty.
+
+> ⚠️ The transformer `SELECT`s encode the production column layout — validate
+> against the live EC2 schema (or a current-schema dump) before the production run.
+
+## Seeding / ingesting the tree & variants
+
+```sh
+# Y tree: ISOGG foundation → graft decoding-us → graft FTDNA (reattach) → scrub:
+decodingus-tree-init --isogg /path/isogg_full_tree.json --apply
+decodingus-tree-init --merge-prod https://decoding-us.com/api/v1/y-tree --snp-graft --graft --apply
+decodingus-tree-init --ftdna /path/ftdna_ytree.json --graft --reattach --apply
+decodingus-tree-init --scrub-recurrent --apply
+# mt tree: FTDNA is the sole source (single RSRS root) — load as the foundation:
+decodingus-tree-init --ftdna /path/ftdna_mttree.json --ftdna-foundation --dna MT --apply
+# (any step without --apply is a dry-run; --stage-review routes flags to /curator/reviews)
+
+# YBrowse variant ingest (mirror + reconcile); deploy-time, large file:
+YBROWSE_GFF=/path/snps_hg38.gff3 [YBROWSE_CHAIN_GRCH37=… YBROWSE_CHAIN_HS1=…] cargo run -p du-jobs
+```
+
+## Deploy
+
+Multi-stage `Dockerfile` builds a single binary on a slim runtime (no JRE, no C
+deps); `compose.yaml` runs it with `postgis/postgis`. `SQLX_OFFLINE=true` for
+DB-less builds. The shared crates are git deps (no sibling path needed in the
+build context).
+
+## Roadmap
+
+**Done** (✅): redesigned schema + temporal tree; `du-db` aggregates; public read
+surface + JSON API + OpenAPI; auth + the full curator suite; tree versioning +
+merge + SNP-graft + curator merge-review; the **multi-source tree build**
+(ISOGG-founded Y + decoding-us + FTDNA graft/reattach + recurrent-link scrub; mt
+tree from FTDNA) + ancestral-state / recurrence modeling; the variant naming
+authority; YBrowse GFF3 ingestion (mirror + reconcile, synonym/strand/INDEL
+handling); federated reporting mirror + reports; STR signature/prediction +
+combined branch age; `du-bio` core; the scheduled-job suite; the full production
+ETL (verified against a real prod dump, `--skip-tree` cutover option); shared
+crates extracted to `decodingus-shared` (git deps); the public **per-sample report**
+(`/sample/:slug`, `is_public` gate, mig 0022) over a unified core+fed read model;
+static/footer pages reconciled with the legacy content (App Passwords removed).
+
+**Remaining, in scope** (⬜):
+
+- [ ] **AT-Protocol OAuth — live handshake.** Client wiring is built and verified
+ to the consent page against a local PDS; the confidential
+ `private_key_jwt` round-trip is the **Edge joint test** (see
+ `docs/atproto-oauth-findings.md`).
+- [ ] **ETL cutover — execution.** ETL + `--skip-tree` verified end-to-end against
+ a real prod dump; what's left is running it for real (freeze prod read-only →
+ dump → prepare locally → ship to AWS → flip) and **alias-aware
+ name-resolution** for biosample→haplogroup (mt has no ISOGG-style alias
+ source; ~15% of mt names need PhyloTree-version mapping).
+- [ ] **IBD matching** — the AppView is the only component that can spot IBD
+ *introduction candidates* across the federation (mine `fed.*` → dual-consent
+ → coordinate the Edge hand-off → persist match state). Placeholder tables
+ (`ibd`); logic forward.
+- [ ] **Social layer** — messaging/consent threads, notifications, blocks, public
+ feed, reputation, group projects (`social`). Underpins IBD consent/notify and
+ stands alone; logic forward.
+- [ ] **Sequencer-lab inference** — instrument-ID → lab lookup API (lets Edge nodes
+ skip a data-entry step) + consensus discovery + curator review. Lab tables
+ exist; logic forward.
+- [ ] **Discovery automation** — the curator review/promote half is built; the
+ automated half (private-variant capture, consensus, auto-reassignment) is
+ forward work.
+- [ ] **Multi-test-type completion** — taxonomy + chip ingest exist; marker
+ coverage / confidence scoring tables are forward work.
+- [ ] **Region management API + bootstrap-from-CHM13** (the S3/CHM13 pipeline; the
+ region CRUD UI already exists).
+
+**Out of scope / not in production** (➖): inbound PDS firehose + fleet,
+patronage/billing, manual sample ingestion (Navigator does it), AppView→PDS
+backfeed (superseded by the outbound mirror / notify-fetch direction), server-side
+BAM/CRAM. Several have placeholder tables but no logic. (IBD, social, and
+sequencer-lab inference were **re-scoped IN** — the AppView is their federation
+coordinator.)
diff --git a/rust/STATUS.md b/rust/STATUS.md
new file mode 100644
index 00000000..c310ad8c
--- /dev/null
+++ b/rust/STATUS.md
@@ -0,0 +1,735 @@
+# DecodingUs Rust rewrite — status & handoff
+
+Living snapshot of the Play/Scala 3 → Rust port. Pairs with `README.md` (roadmap).
+Last updated **2026-06-11** (this session: **the McDonald-2021 branch-age model built
+out end-to-end** + the **T2T/Hallast Y reference-region pipeline** — all post-launch
+catalog refinement, no change to the launch-critical path):
+- **Y reference-region ingest** (`du-jobs/yregions`, `run-once yregions`) — loads
+ T2T-CHM13v2.0 Y structural BEDs (AZF/DYZ, **amplicons v2**, **inverted-repeats v2**,
+ chrXY sequence-class) into `core.genome_region` via `genome_region::upsert_by_key`
+ + `prune_source_orphans` (full-snapshot sync). `du_db::variant::refresh_region_overlaps`
+ flags low-confidence-for-placement variants (`annotations.region_overlaps`), consumed
+ by `snp_graft` (`UnreliableAnchor` → curator review). (memory `yregions-ingest`.)
+- **PDF branch-age engine** (`du_db::pdf`) — discretized age PDFs (poisson / gaussian /
+ **mixture**, `multiply`=Eq 1, `convolve`=Eq 7, `gaussian_on`/`poisson_on` grid-param),
+ replacing the inverse-variance shortcut.
+- **SNP age = bottom-up tree propagation** (`age::propagate`, Eq 5–8, `HET_MASK`).
+ **STR age = multi-step `P(g|m)`** (McDonald **Table 1** embedded + ω convolution
+ fallback, `ystr`) → per-marker Poisson mixture → **tree-propagated**
+ (`ystr::propagate_str` + §2.5.2 ancestral-motif reconstruction), retiring the
+ star-phylogeny pooling.
+- **COMBINED = direct PDF product** of the SNP / STR / genealogical terms (Eq 1) on a
+ shared TREE grid — non-Gaussian shape preserved; disjoint terms fall back to the
+ inverse-variance combine.
+- **Hallast 2026 incorporation** — v2 BEDs + callable-mask validation; BEAST **0.76e-9
+ cross-check clock** (`age::HALLAST_RATE`, not swapped for Helgason); genealogical
+ calibration anchors (`scripts/seed-hallast-anchors.sql`, D1 TMRCA 19,450 ybp, model-
+ dated). P9 palindrome **BLOCKED** on supplementary coords. (`documents/planning/
+ y-preprint-hallast-2026-incorporation.md`.)
+- **Real STR mutation rates** — `scripts/seed-str-mutation-rates.sql` (137 markers:
+ Willems 2016 1000G MUTEA + 95% CI primary, YHRD gap-fill for core markers) replaces
+ the `DEFAULT_STR_RATE` fallback; ω columns stay at the Ballantyne-derived global model.
+
+Prior (2026-06-07): public per-sample report (`/sample/:slug`, mig 0022); static/footer
+pages reconciled with legacy Scala; collaboration-platform design docs (d1–d5);
+design-doc triage (superseded docs removed, rest reconciled).
+Prior (2026-06-05): FTDNA Y-tree SNP-graft + `--reattach`; recurrent-link scrub;
+mtDNA tree wired as an FTDNA RSRS foundation; ETL `--skip-tree` cutover option.
+
+## TL;DR
+
+The **spine is done and then some**: redesigned schema, data layer, public
+HTML/HTMX surface, auth + curator tools, the full production ETL, the public JSON
+API, tree versioning + merge, the SNP-anchored graft + its curator review UIs, the
+YBrowse mirror→reconcile catalog pipeline (≈3M variants), federated **reporting**
+(mirror **and** web endpoints), branch ages, and Y-STR signatures/prediction/age.
+
+The launch-critical path is now just two things: **(1) the data cutover** — the
+ETL has been **verified end-to-end against a real production dump** (2026-06-04,
+363 MB / PG 15): all 34 aggregates reconcile, and the **`--skip-tree` + tree-init**
+cutover flow is verified (prod→`decodingus_cutover`: tree empty, non-tree
+aggregates reconcile, the multi-source tree builds into the empty namespace).
+What's left is *executing* the cutover against live/final data (+ alias-aware mt
+name resolution) — and **(2) the live AT Proto OAuth handshake** (the cross-host
+"Edge joint test").
+The remaining *feature* mass is post-launch: **haplogroup-discovery automation**,
+**multi-test-type completion**, **IBD matching + the social layer**, and
+**sequencer-lab inference** (the AppView coordinates IBD introductions, hosts the
+social surfaces, and resolves instrument→lab for the Edge — only patronage/billing
+is now fully out of scope). See "What's left".
+
+## Layout
+
+- **`/Users/jkane/Development/decodingus/rust`** — this workspace (AppView-only crates).
+ - `du-db`, `du-external`, `du-web`, `du-jobs`, `du-migrate`
+- **`/Users/jkane/Development/decodingus-shared/crates`** — shared crates, separate git repo.
+ - `du-domain` (pure types + algorithms, incl. `merge`), `du-atproto`, `du-bio`
+ - Pushed to `github.com/JamesKane/decodingus-shared`; consumed via **git deps
+ pinned to a rev** in `rust/Cargo.toml` (Docker build unblocked — no sibling
+ path dep needed). To update: push the shared repo, then bump `rev` (or switch
+ to a pushed tag, e.g. `v0.1.0` — created locally, not yet pushed). For local
+ co-dev against working-tree changes, add a `[patch]` back to the sibling paths.
+- Legacy Scala app: `/Users/jkane/Development/decodingus` (parent dir). Navigator:
+ `/Users/jkane/Development/scala/DUNavigator`.
+
+## Local dev / how to run
+
+Postgres runs under Apple `container` (name `du-pg`), reachable at its own IP
+(no localhost forwarding):
+
+```
+DATABASE_URL="postgres://postgres:dev@192.168.64.2:5432/decodingus?sslmode=disable"
+APP_SECRET="" # signs session cookies
+```
+
+- Run web: `DATABASE_URL=... APP_SECRET=... PORT=9000 cargo run -p du-web` (binary `decodingus`).
+- Run jobs scheduler: `DATABASE_URL=... cargo run -p du-jobs` (binary `decodingus-jobs`).
+ - **One-shot ops:** `decodingus-jobs run-once ` — `ybrowse` (full GFF3 stream +
+ reconcile; needs `YBROWSE_GFF` [+ optional `YBROWSE_CHAIN_GRCH37/HS1`]),
+ `reconcile` (re-derive `core.variant` from the loaded mirror without re-streaming),
+ `yregions` (load the T2T-CHM13 Y reference-region BEDs + refresh region flags), and
+ `branch-age` (recompute STR signatures + the combined branch ages).
+- Tests: `DATABASE_URL=... cargo test -p du-db` (live-DB tests skip/pass if unset).
+ - **Safe against any DB:** every du-db integration test now provisions a private,
+ throwaway database via `du_db::testing::ephemeral_db` (migrated, dropped on Drop),
+ so `cargo test` never touches the catalog `DATABASE_URL` points at.
+ - `du-domain` tests need no DB (`cargo test -p du-domain`).
+- Migrations auto-apply on web/ETL startup; the `du-db` migrations test also applies them.
+- **Gotcha:** if a *committed* migration changes, recreate the dev DB
+ (`decodingus`) — sqlx errors on a checksum mismatch (`VersionMismatch`).
+
+### Databases in use
+- `decodingus` — dev DB (migrations + live tests' base server for ephemeral DBs).
+- `decodingus_legacy` — loaded from `scripts/mock-legacy.sql` (current-schema mock).
+- `decodingus_etl` — ETL target (the migrate binary recreates/migrates it).
+
+## What's done (✅)
+
+- **Schema** — `migrations/0001–0023`. JSONB "document columns" (variant
+ coordinates/aliases/**evidence**, biosample source_attrs/atproto, haplogroup
+ provenance, coverage, …). Highlights since the merge work: `ident.audit_log`
+ (0010), fed reporting (0011–0012), Y-STR (0013–0014), backbone (0015), **variant
+ naming authority** (0016, nullable `canonical_name` + partial unique index +
+ `core.next_du_name()`), **variant evidence** (0017), **YBrowse mirror +
+ reconcile machinery** (0018), **strand-canonical fold** (0019), **INDEL/MNP
+ canon** (0020), **ancestral-state / per-branch ASR** (0021), **`is_public`
+ biosample gate** (0022, the public per-sample report), **variant
+ `defining_haplogroup_id` recurrence model** (0023).
+- **`du-db`** — query modules for every aggregate (variant, haplogroup, biosample,
+ publication, genome_region, coverage, proposal, study, change_set, merge, auth,
+ naming, ybrowse, wip, ystr, age, fed, consent, support) + `testing` (ephemeral DB).
+- **Public HTML/HTMX** (`du-web/routes`) — variants browser, **Y/MT tree as two
+ server-rendered SVG cladograms** (`tree_layout.rs`; breadcrumb re-root,
+ orientation cookie toggle, name/variant search, SNP-detail sidebar with
+ **branch provenance + per-variant locus/anc/der**, backbone/recent coloring +
+ legend, **full-viewport width**, **client-persisted depth selector**
+ [localStorage, `?depth=`]), references + per-pub biosamples, biosample map
+ (PostGIS), coverage benchmarks; i18n (en/es/fr), `HX-Request` fragment
+ negotiation, vendored assets, **site chrome aligned with the Scala app**.
+- **Auth + curator** — signed-cookie sessions, `Curator` RBAC extractor, curator
+ CRUD for haplogroups/variants/genome-regions, curation proposal
+ intake→review→promote, and the review surfaces below.
+- **Variant Naming Authority** (mig 0016, `du_db::naming`, `/curator/naming`) —
+ nullable `canonical_name`, DU-name minting (`core.next_du_name()`), lifecycle
+ (UNNAMED/PENDING_REVIEW/NAMED), same-coordinate dedup; GFF3 propagation at
+ `GET /api/v1/variants/export.gff`. **Gotcha:** the partial unique index means
+ every `ON CONFLICT (canonical_name)` carries `WHERE canonical_name IS NOT NULL`.
+- **YBrowse ingest = mirror + reconcile** (migs 0017–0020, `du-jobs/ybrowse`,
+ `du-db/ybrowse`) — streams `snps_hg38.gff3` (≈3.17M lines) into a verbatim
+ `source.ybrowse_snp` **mirror**, then `reconcile` *derives* `core.variant`
+ idempotently: synonym-fold by strand-canonical key, coordinate-fallback match,
+ INDEL trim-normalize / MNP-typing, rank-based canonical, provisional→DU mint;
+ single matches **enrich existing variants** (coords + mutation_type + evidence,
+ curator choices locked); multi-match clusters → `source.ybrowse_reconcile_flag`
+ → **`/curator/reconcile-flags`** → `variant::merge_into`. First real full run:
+ 2.99M clusters → 2.88M created, **100,968 existing enriched**, 11,406 flagged;
+ catalog now ~3.0M variants, ~3.0M with coordinates. (See memory
+ `ybrowse-ingest-mirror`.)
+- **Variant coordinate enrichment** — reconcile fills coords/types on any
+ name-matching existing variant; a `decodingus-tree-init --backfill-prod-coords`
+ pass fills the b37/hs1 builds the decoding-us API carries that the graft dropped
+ (complement to YBrowse's GRCh38). Sidebar shows `chrY:pos anc>der [build]`.
+- **SNP-anchored graft** (`du-db/snp_graft`, `decodingus-tree-init`) — classifies a
+ source tree (decoding-us prod) against the catalog by defining-SNP anchor
+ (enrich-match / graft-novel / review), Phase-4 curator-review export, and stages
+ flags + name-collisions + graft-blocked items into a DRAFT change-set
+ (`--stage-review`) triaged at **`/curator/reviews`** (SNP-scatter + tree-preview
+ + accept-anchor/reparent/merge/defer; `tree.wip_*` enacted by the apply engine's
+ WIP pass). (See memory `prod-tree-snp-graft`.)
+- **Y reference-region pipeline** (`du-jobs/yregions`, `du-db/genome_region`,
+ `du-db/variant`) — `run-once yregions` loads the T2T-CHM13v2.0 Y structural BEDs
+ (AZF/DYZ heterochromatin, amplicons v2, inverted-repeats/palindromes v2, chrXY
+ sequence-class) into `core.genome_region` (`upsert_by_key` + `prune_source_orphans`
+ = full-snapshot sync). `refresh_region_overlaps` stamps `core.variant.annotations.
+ region_overlaps` for variants in unreliable-for-placement regions; `snp_graft`
+ routes anchors whose every supporting SNP is unreliable to curator review
+ (`UnreliableAnchor`). Empirically validated by Hallast 2026 (Fig 5h-i callable
+ mask). hs1 coords only (1-based inclusive). (Memory `yregions-ingest`.)
+- **Tree sample leaves (YFull-style)** (`du-db/tree_sample`, mig 0037
+ `tree.haplogroup_sample`) — places **non-D2C** biosamples (`source <> 'CITIZEN'`) as
+ leaves under the tree node their **published call** (`core.biosample.original_haplogroups`)
+ resolves to, reusing `haplogroup::resolve_name_or_variant` (name→alias→defining-SNP→
+ normalize). `recompute_placements(dna)` is an advisory-locked declarative engine
+ (assign+prune, bumps `tree_revision`); unresolvable calls kept `UNPLACED` for curator
+ triage. Serving: `/api/v1/y-tree` nodes carry a **cumulative `sample_count`**;
+ `/api/v1/{y,mt}-tree/node/{name}/samples` lists the leaves (accession/alias/source +
+ paper citation). `du-jobs run-once tree-samples-recompute` + daily. Y now, mt-ready
+ (dna_type-parameterized; no mt recompute until the mt tree lands). **HTML cladogram
+ done (2026-06-14):** placed samples render as **YFull-style leaf tips** hanging off their
+ node (`tree_layout::LaidTip` via `tree_sample::direct_labels`) — each tip reserves a full
+ node slot (spaced like any leaf), the node centers over children+tips, and tip connectors
+ share the node's child bus; capped 8/node + a "+N" overflow tip → sidebar. (The JSON API
+ keeps a `sample_count` per node.) The SNP sidebar lists the placed leaves (label + source +
+ citation, capped 50 + "+N more").
+ **Curator triage (2026-06-13):** `status='CURATED'` (manual placement the recompute
+ preserves) + Curator-gated `GET /manage/tree-sample/unplaced` (the unresolved-call queue) +
+ `POST /manage/tree-sample/place` (pin a sample under a chosen node). (Memory
+ `tree-sample-leaves`.)
+- **ETL** (`du-migrate`) — **full production surface**: catalog (donors, biosamples,
+ variants, tree, studies, publications), ident/auth, genomics. Validated vs the
+ schema-only `db.schema` and the current-schema mock with data; all aggregates
+ reconcile.
+- **Public JSON API** (`du-web/api.rs`) — read endpoints under `/api/v1/*` +
+ OpenAPI 3 + Swagger UI at `/api` (utoipa). Includes the federated population
+ reports `/api/v1/reports/{coverage,ancestry,haplogroups}` aggregated from the
+ `fed.*` mirror with query-time SQL, plus `haplogroups/:name/{str-signature,age}`
+ and `POST /api/v1/str/predict`. **Tree cache revalidation (2026-06-12):** the
+ `{y,mt}-tree[/full]` endpoints emit a strong `ETag` + `Last-Modified` from a
+ persisted `tree.tree_revision` marker (mig 0024) and honor `If-None-Match` → 304
+ *before* the ~28 MB query; the marker is bumped by every tree-mutating op
+ (change-set apply, coordinate/alias enrichment, reconcile, tree-init). Added
+ `/api/v1/{y,mt}-tree/version`. Memory `tree-cache-revalidation`.
+- **Tree versioning** (`du-db/change_set.rs`, `du-web/routes/versioning.rs` +
+ `change_sets.rs`) — change-set lifecycle + per-change review + diff + temporal
+ apply engine; curator-gated machine API at `/manage/change-sets/*` **plus a
+ two-panel HTMX review UI** at `/curator/change-sets`. Integration-tested.
+- **Tree merge** (`du-domain/merge.rs` + `du-db/merge.rs`) — pure Identify-Match-
+ Graft; `materialize` → change-set via placeholder-chained `tree_change`; endpoints
+ `/manage/haplogroups/merge[/preview]`. Fixtures + e2e tests pass.
+- **Federated reporting** (`du-db/src/fed/`, `du-jobs/jetstream.rs`, migs 0011–0012)
+ — the AppView **aggregates and reports; it does not analyze.** A long-lived
+ Jetstream consumer mirrors Navigator's published anonymized computed-summary
+ records (the `✅ AppView Complete` set) into `fed.*` tables, cursor-resumed,
+ idempotent+ordered. **Privacy:** typed anonymized columns only, no raw JSONB for
+ PII-bearing records. Flow (a) proposal intake + (b) reporting ingest + (c)
+ reporting web endpoints are **all DONE**. (Memory `atproto-federation-direction`.)
+- **Y-STR per-branch signatures + prediction + age** — `fed.str_profile` mirror
+ (Jetstream) + `du-db::ystr` modal-haplotype aggregation → `tree.haplogroup_
+ ancestral_str` (mig 0013) via `str-signature-recompute`; STR→branch `predict`
+ at `POST /api/v1/str/predict`. STR age is the **McDonald multi-step PDF model**:
+ `P(g|m)` from Table 1 (embedded) + ω convolution fallback → per-marker Poisson
+ mixture (`du_db::pdf::Pdf::mixture`) → **tree-propagated** TMRCA PDFs
+ (`ystr::propagate_str`, ancestral-motif reconstruction). Per-marker rates from
+ `genomics.str_mutation_rate` (seeded, 137 markers; Willems 2016 + YHRD).
+- **Combined branch age (McDonald 2021)** (`du-db/age.rs`, migs 0013/0014) — each
+ evidence term is a **PDF**: SNP TMRCA (bottom-up tree propagation, Eq 5–8, on the
+ `du_db::pdf` grid), STR TMRCA (`ystr::str_tmrca_pdfs`), and genealogical/aDNA-anchor
+ Gaussians; `COMBINED` is their **direct product** (Eq 1, shape-preserving; disjoint
+ → inverse-variance fallback), gap-filling `tree.haplogroup.{formed,tmrca}_ybp`
+ (curated values never overwritten). `HET_MASK` excises heterochromatic SNPs;
+ Helgason rate default with Hallast `HALLAST_RATE` as a recorded cross-check. Runs in
+ `branch-age-recompute` (= `run-once branch-age`). SNP/STR/anchor terms data-gated
+ (sparse pre-cutover; the dev tree is tree-only, so a live run is a near no-op).
+- **`du-jobs`** — tokio scheduler + **`run-once`** one-shot mode; jobs:
+ `db-heartbeat`, `ybrowse-variant-ingest`, `publication-update`,
+ `publication-discovery`, `publication-pubmed-update`, `ena-study-enrichment`,
+ `str-signature-recompute`, `branch-age-recompute`; plus the Jetstream
+ reporting-mirror consumer (set `JETSTREAM_URL`).
+- **`du-external`** — OpenAlex, ENA, NCBI/PubMed; AWS SES + Secrets Manager behind
+ the `aws` feature.
+- **`du-atproto`** — DID/handle resolution, Ed25519 verify, PKCE/DPoP/private-key-
+ JWT OAuth client + metadata builders (library; HTTP surface = the Edge test below).
+- **Public per-sample report** (`/sample/:slug`, `du-web/routes/samples.rs` +
+ `templates/samples/`) — ExploreYourDNA-style page gated by `core.biosample.is_public`
+ (mig 0022). `du_db::biosample::report` is the **unified read model**: anchors on the
+ canonical `core.biosample` (+ donor sex/origin, publications) and attaches the
+ federated analytics (`fed.biosample`/`fed.sequencerun`/`fed.coverage_summary`/
+ `fed.population_breakdown`) via `atproto.uri ↔ *.biosample_ref` — the seam the
+ eventual core/fed **biosample consolidation** collapses into (memory
+ `biosample-consolidation`). Sections: identity, Y+mt **haplogroup pathways**
+ (`du_db::haplogroup::pathway` — root→tip clades + ages + defining SNPs; graceful
+ "not placed" gap), origin Leaflet map, sequencing/coverage, ancestry stacked bar.
+ Curator `is_public` toggle (`/curator/samples/:slug/public`); JSON API
+ `GET /api/v1/samples/:slug`. Tested (`du-db/tests/sample_report.rs`). **Follow-up:**
+ the report shows one `populationBreakdown`; Navigator now publishes two methods —
+ pick PCA-GMM (memory `ancestry-method-pick-followup`).
+- **Secondary web surfaces** — static pages (about/contact/**reputation**/terms/
+ privacy/**cookies**/FAQ; content reconciled with the legacy Scala prose —
+ **App Passwords removed**), footer nav matching the legacy set, `sitemap.xml`/
+ `robots.txt`, GDPR cookie-consent banner, read-only **profile** page,
+ reCAPTCHA-verified **contact** form. Root README rewritten for the Rust AppView.
+- **Testing** — du-domain unit tests (no DB); du-db integration tests isolated to
+ ephemeral databases (`du_db::testing::ephemeral_db`); du-web i18n parity test
+ enforces es/fr cover every English key.
+
+## What's left, in scope (⬜)
+
+Launch-critical first, then the post-launch feature mass.
+
+> **Design landscape (2026-06-07).** The post-launch collaboration/IBD layer now has
+> drafted build specs: `documents/planning/d1`–`d5` + `design-roadmap-rust-rewrite.md`
+> — **D1** encrypted Edge-to-Edge exchange + AppView broker (the shared substrate),
+> **D2** PII-free ResearchSubject registry, **D3** IBD impl on D1, **D4** assertion
+> store (split PII rails), **D5** group-project ACL. Central invariant: **AppView
+> holds no PII — it brokers** (memory `collab-platform-d1-d5`). Two tracks join at
+> D1: Platform D1→D2→D4→D5, Match D1→D3; the Catalog track (D6 discovery, D7
+> multi-test, D8 sequencer-lab) is independent. The original planning docs were
+> triaged and reconciled/removed — see `documents/{planning,proposals}/*triage*.md`.
+
+1. **Cutover** (see "Cutover strategy") — ETL verified end-to-end. Chosen strategy:
+ freeze prod read-only → fresh dump → prepare locally (ETL data + ISOGG-founded
+ tree build) → `pg_dump` → restore on AWS → flip. **`--skip-tree` DONE** (commit
+ 0f83dbc): `decodingus-migrate --skip-tree` skips the 3 tree transforms +
+ reconcile checks (the tree is built by `tree-init` into the empty namespace);
+ biosamples carry haplogroup names as JSON and resolve at read time; `core.variant`
+ still migrates (tree-init reuses by `canonical_name`). Cutover order: migrate
+ `--skip-tree` → tree-init.
+ **FTDNA descoped (2026-06-12):** beta tree = **ISOGG foundation + decoding-us
+ graft, no `--reattach`**; **no mt tree at beta**. The FTDNA-heavy subsections
+ below (mt foundation, 81k hybrid, reattach) are superseded — keep for later.
+ So name resolution is **Y-only**.
+ **Name resolution — DONE (2026-06-12).** Diagnosed against the real prod dump:
+ `public.biosample_haplogroup` (the reconciled FK) is **empty in prod**, so
+ `original_haplogroups` carries the raw heterogeneous **publication** call text
+ (FTDNA shorthand `R-M269`, path strings `R-DF27 > Z195 > Z198`, bare SNPs, old
+ YCC longhand `R1b1a2a1a2c1g`, `n/a`). Only ~20% match a node name directly.
+ `du_db::haplogroup::resolve_name_or_variant` now has a **normalization fallback**
+ (`normalize_haplogroup_call`: strip FTDNA prefix, terminal path token, split SNP
+ synonyms) that resolves the SNP-bearing calls via the existing defining-variant
+ phase → ~70% of rows (improves the per-sample report AND tree search). Residual:
+ ~59 YCC-longhand names need an authoritative old-YCC→modern crosswalk (ISOGG file
+ has only 13 name-aliases — don't hand-guess). Memory `biosample-y-name-resolution`.
+ **Per-variant upsert perf — DONE (2026-06-12).** The "1s slow-statement" was the
+ no-op `DO UPDATE SET canonical_name = EXCLUDED.…` rewriting every *pre-existing*
+ variant row (the catalog is pre-loaded by YBrowse, so the graft/merge/apply calls
+ nearly all conflict) → MVCC bloat + index churn (~1.9s in bulk, +893 heap pages /
+ 30k rows). The index is a correct arbiter — not the issue. Fixed:
+ `du_db::variant::ensure_base_variant_id` (`DO NOTHING` + read-back, zero writes on
+ conflict); all 3 `get_or_create_variant` route to it. Memory
+ `variant-upsert-noop-write`.
+ **YCC→SNP node rename — DONE (2026-06-12).** `tree-init --rename-snp-shorthand`
+ (`du_db::haplogroup::rename_to_snp_shorthand`) drops YCC-longhand node names
+ (`R1b1a2`) to `-` (`R-M269`), single major letter
+ (renormalizes decoding-us `E1b-`→`E-`), keeping the **old YCC name in
+ `provenance.aliases`** — which also **closes the YCC resolution residual** (the
+ resolver's alias phase now resolves old biosample YCC calls). Naming SNP: existing
+ shorthand → ISOGG-designated first variant (`--isogg`) → DB-linked variant
+ (SNP-shaped only). Macro/backbone nodes, coordinate-name variants, and name
+ collisions are skipped + flagged (no guessing). Dev-tree dry-run: 10,254/10,516
+ renamed; ~185 keep YCC (twin collisions + no-SNP). Run it as a post-graft step in
+ the cutover tree build. Memory `ycc-to-snp-rename`.
+2. **Live AT Protocol OAuth handshake — the cross-host "Edge joint test."** Library
+ + a dev public-client path are verified locally up to the **consent click**
+ (gated `decodingus-shared/.../tests/live_pds.rs`: discovery + PAR + DPoP +
+ `use_dpop_nonce` → `request_uri`, then with a Caddy TLS proxy up to the authorize
+ page). The confidential web-client `private_key_jwt`-PAR round-trip can't run
+ under Apple `container` (no `--add-host` for the PDS to resolve our `client_id`
+ host) → it's the Edge joint test. Token path wired; remainder is the browser
+ consent + cross-host verify. Runbook: `docs/atproto-oauth-findings.md`,
+ `docs/atproto-edge-reply.md`.
+3. **Haplogroup-discovery AUTOMATION — DONE (2026-06-12).** Both halves now built.
+ Citizens publish a **`privateVariant` lexicon** record (their variants beyond the
+ terminal); the Jetstream consumer mirrors it into `fed.private_variant` (mig 0028).
+ The **discovery consensus engine** (`du_db::discovery`, mig 0029) materializes them
+ into `tree.biosample_private_variant`, then pools per-sample variant sets into
+ `tree.proposed_branch` by **variant-set Jaccard** — deterministic, declarative
+ recompute (idempotent, stable-id UPSERT via a `cluster_key` partial index), config
+ thresholds from `tree.discovery_config`, real confidence (count + distinct
+ submitters + variant-set consistency), `READY_FOR_REVIEW`/`SPLIT_CANDIDATE`
+ transitions, opt-in auto-promote (off by default). On **promotion** the
+ contributing samples' private variants are marked `PROMOTED` + reassigned to the
+ new terminal (`discovery::reassign_after_promote`, in `proposal::promote`'s tx) —
+ which also freezes them out of the recompute loop. Read API
+ `GET /api/v1/discovery/proposals[/:id]`; the `/curator/proposals` UI now shows
+ defining variants + confidence + a split banner. Job `du-jobs run-once
+ discovery-consensus` (+ hourly). Mirrors the sequencer engine's structure. Memory
+ `discovery-consensus-engine`. **Remaining (future):** split *execution* (flagging
+ only), a deepest-defined-branch read-path, geographic/temporal confidence signals.
+4. **Multi-test-type — DONE (2026-06-12).** The AppView's whole multi-test-type
+ concern is **call reliability** for the shared genealogy components, with two
+ inputs (per-test tracking/parsing is Navigator's, not the AppView's). **(a) Coverage
+ conformance** (below). **(b) Cross-technology consensus** — `fed.haplogroup_reconciliation`
+ (the donor's call reconciled across all its technologies: consensus_haplogroup +
+ confidence + snp_concordance + run_count) is now the **authoritative call**. Bridge
+ = `reconciliation.did = core.biosample.atproto->>'repo_did'` + dna (citizen
+ self-publish; no schema change). The per-sample **report** resolves
+ Reconciled→FedConsensus→Original and shows the consensus + N runs + confidence +
+ concordance (`biosample.rs`, `_pathway.html`, `HaplogroupCallOrigin::Reconciled`).
+ **Tree evolution gates+weights on it** (mig 0031): the discovery engine **excludes**
+ contributors below `min_consensus_confidence` (0.5) or `INCOMPATIBLE` (un-reconciled
+ kept), and **down-weights** proposal confidence by the cluster's mean consensus
+ reliability (`w_reliability` term) — so the consensus drives the tree, never
+ individual runs. Memory `discovery-consensus-engine`.
+
+ **(a) Coverage norms & conformance.** Reframed (per the user) from the Scala doc's
+ haplogroup-marker/accuracy-tier/IBD machinery to grounded coverage QA: **callable
+ loci + depths per test type vs the norm**.
+ `genomics.test_type_coverage_norm` (mig 0030) holds the **empirically-derived**
+ cohort norm per test type (median/p25/p75 depth, median pct tiers, typical Y/mt
+ marker counts), recomputed from `fed.coverage_summary ⋈ fed.sequencerun` (+
+ `fed.genotype` markers) by `du_db::coverage::recompute_norms` (advisory-locked,
+ declarative; `du-jobs run-once coverage-norms` + hourly). The **per-sample report**
+ now shows actual depth vs the cohort norm (+ advertised spec) with a BELOW/AT/ABOVE
+ badge — `conformance()` baselines on the **empirical cohort norm**, not the
+ advertised aligned bar (a "30× WGS" is a ~90 Gb raw-yield spec; D2C labs don't
+ target 30× aligned, so an advertised number would mislabel them). Vendor tracking:
+ `coverage::benchmarks` DTO gains `meets_spec`/`depth_delta` (lab × test type).
+ Read API `GET /api/v1/test-types[/:code]` (taxonomy + norm). Memory
+ `test-type-coverage-norms`. **Deferred:** age-contribution wiring (typical SNP
+ counts captured, not yet fed into `age.rs` — Eq-4 callable-interval); raw-yield
+ (Gbases) norm; cataloged-coverage union. **Out:** haplogroup marker-coverage /
+ accuracy-tier, cross-test-type IBD (D1/D3). No `test_type_definition` seed (read
+ opportunistically; key off the federated test-type string).
+ (`documents/planning/multi-test-type-roadmap.md`.)
+5. **IBD matching — AppView as coordinator (NOT dropped).** The AppView is the
+ only component with the cross-federation view to identify **introduction
+ candidates**, so it must: mine `fed.*` for candidate pairs (shared haplogroup,
+ population overlap, shared-match signals), run the **dual-consent** handshake,
+ coordinate the Edge hand-off, and **persist match state** (attestations /
+ overlap scores / suggestions) for ongoing match lists + dedup. It stores **no
+ raw autosomal data** and does **no** segment comparison — that's Edge-to-Edge.
+ Schema `ibd` (mig 0007). The **candidate-generation engine is DONE (2026-06-12)** —
+ the D1-independent first slice: `du_db::ibd::recompute_suggestions` mines
+ introduction candidates from `fed.*` (population overlap **within ancestry blocks** =
+ dominant super-pop × z-scored PCA cell; shared terminal Y/mt consensus haplogroup;
+ 2-hop shared-match over `ibd_discovery_index`), combines + ranks + **caps top-K per
+ sample** (the no-N:N guarantee), declaratively writing `ibd.match_suggestion`
+ (preserves DISMISSED/CONVERTED). `du-jobs run-once ibd-discovery-recompute` + daily;
+ `suggestions_for` reader. Engine-only — **no public API** (candidate pairs gate on
+ the D1 consent flow). **Federated read API DONE (2026-06-12)** — the entry point of
+ the whole flow, and it needed **no new auth foundation**: the existing Ed25519
+ signed-poll pattern (`verify_signed` + `messages::poll` + 300s window) + the
+ `core.biosample.atproto->>'repo_did'` bridge the engine already uses dissolved the
+ apparent DPoP blocker. `du_db::ibd`: `suggestions_for_did` (owner-DID-scoped via the
+ bridge), `is_suggested_to_did` (introduce authz), `owner_did_of_sample` (server-side
+ counterpart resolution), `messages::{poll,introduce}`. Endpoints (`routes/ibd.rs`,
+ signed, **personal scope** — not project-scoped): `GET /api/v1/ibd/suggestions` (own
+ **pseudonymous** candidates — only `suggested_sample_guid` + non-PII `{signals}`
+ scores), `POST /api/v1/ibd/introduce` (broker-mediated: resolves the counterpart DID
+ server-side, calls `exchange::create_request`, **never returns the DID** — caller learns
+ it only post-mutual-consent via `exchange::pending_for`). **Lifecycle round-off
+ (2026-06-13):** purpose is now **routed per signal** (HAPLOGROUP→IBD_Y/IBD_MT via the
+ engine's recorded `hgDnaType`, else IBD_AUTOSOMAL — `introduction_purpose`); introduce
+ marks the suggestion **CONVERTED**; new `POST /api/v1/ibd/dismiss` → DISMISSED (engine
+ preserves it). Memory `ibd-candidate-generation`. **Remaining (needs D1/Navigator):** the
+ daily recompute scheduler exists (confirm cadence); attestation-ingest +
+ `depth_score` from the tree; PCA-LSH tuning; Navigator consume-UI +
+ introduce→consent→relay round-trip. Authoritative design:
+ `documents/planning/d3-ibd-matching-impl.md`
+ §3 (on `d1-encrypted-edge-exchange.md`).
+ **D1 exchange BROKER DONE (2026-06-12)** — the shared substrate gating the Match +
+ Platform tracks. `exchange.*` schema (mig 0032; the unused `ibd.match_*` folded +
+ dropped) + `du_db::exchange` (publish/fetch X25519 key, request, **dual-consent
+ gate** → session, **`incoming`** [PENDING requests awaiting a recipient — closes the
+ introduce→consent loop, **symmetric-blind**: no initiator DID], pending, blind relay
+ post/pull/ack, TTL `expire`) + `du-web` `/api/v1/exchange/*` endpoints, all
+ **Ed25519-signature-authenticated** (`crate::sig::verify_signed` — **no OAuth/cookie**
+ per call, so D1 doesn't wait on the OAuth joint test) + `du-jobs exchange-expire`.
+ PII-free broker — never sees plaintext/keys, relays opaque ciphertext. Memory
+ `exchange-broker`.
+ **DEVICE-KEY AUTH FOUNDATION DONE (2026-06-13)** — fixes the gap that the DID-doc
+ `#atproto` signing key is PDS-custodied (a desktop client can't sign with it, can't add
+ its own verificationMethod), so only `did:key` could authenticate. Now a client
+ publishes its Ed25519 device PUBLIC key as a `com.decodingus.atmosphere.deviceKey` record
+ in its own repo (repo-write = proof of control over repo_did); the AppView ingests it
+ (`fed.device_key`, mig 0036) like any `fed.*` record. **`verify_signed(pool, did, msg,
+ sig)`** now: `did:key` self-certifies; **`did:plc/web` ⇒ match any registered device key**
+ (`du_db::fed::device_key::keys_for`; none ⇒ 403, the bootstrap), DID-doc resolution
+ dropped (no per-call network). N keys per DID; **revoke = delete the record** (routes
+ through `fed::delete`). All 18 signed call sites thread `&st.pool`. PII-free (DID + public
+ key only). Memory `device-key-auth`. Navigator: generate+keychain a device key → one-time
+ OAuth `createRecord` → sign all Edge calls with it. **Remaining (not AppView):** the
+ `du-exchange` crypto crate (X25519/AEAD/X3DH-lite, `decodingus-shared`) + the Navigator Edge
+ relay client/session driver (DUNavigator) for the end-to-end round-trip.
+6. **Collaboration + social layer.** The genealogy-collaboration platform (group
+ projects, ResearchSubject registry, assertions) is specced in **D2/D4/D5** on the
+ D1 channel. **D2 ResearchSubject registry DONE (2026-06-12)** — `research.*` schema
+ (mig 0033: PII-free pseudonymous person nodes + `social.group_project` memberships +
+ tombstone merge audit + sparse biosample link) + `du_db::research` + `du-web`
+ `/api/v1/research/*` endpoints, **signature-authenticated** (`crate::sig`, shared
+ with D1) **and authorized** from existing data (register → project owner; merge →
+ steward of both; custody → subject steward; read → project participant). PII-free
+ invariant holds. Memory `research-subject-registry`. **D5 group-project ACL DONE
+ (2026-06-12)** — `research.project_member` (mig 0034, reusing `social.group_project`
+ as the project, `owner_did`=founding ADMIN) + `du_db::research` Role/Capability ACL
+ (`role_of`/`is_team_member`/`can`/`add_member`/`revoke_member`/`members_of`), **wired
+ in**: D2 register is `ManageSubjects`-gated + subjects read team-gated; D1
+ project-scoped request/consent require live team membership; team endpoints
+ `/api/v1/research/project/{member,member/revoke,members}` (signed, ADMIN-gated).
+ Memory `group-project-acl`. **D4 assertion store (R2) DONE (2026-06-12)** —
+ `research.assertion` + `research.subject_current_view` (mig 0035), the attributed,
+ append-only, scoped claim primitive. `du_db::research`: `Predicate` enum + PII
+ classifier (`MDKA_IS`/`IDENTITY` have **no** AppView table — R3 P2P only; `NOTE` is
+ PII-by-default, storable only when `pii_cleared`; a `scan_pii` value scrubber rejects
+ emails/overlong text regardless) + `record_assertion`/`retract_assertion`/`refold`
+ (per-(subject,predicate,scope) fold → SETTLED|DISPUTED, **per-project isolated** so a
+ subject in two projects never bleeds claims) + `accept_same_person` (drives the D2
+ `merge_subjects(method=ASSERTION)`, **never** auto-collapsed). Endpoints
+ `/api/v1/research/{assertion,assertion/retract,assertion/resolve,current-view}` —
+ signed + role-gated (`WriteAssertions`=ADMIN/CO_ADMIN; `ResolveDispute`=ADMIN/CURATOR).
+ Memory `assertion-store`. **Deferred (Navigator/R1/later):** R3 PII over D1 +
+ `assertion_local`; R1 `com.decodingus.research.assertion` lexicon + du-jobs Jetstream
+ ingest (no publisher yet — `record_uri`/PUBLIC scope already in place); catalog
+ promotion via `tree.change_set`; shared `du-domain` assertion types. The broader
+ social surfaces (messaging/feed/reputation/blocks) are the
+ reconciled forward proposals (`documents/proposals/{group-project-system,
+ Messaging_and_Feed_System,Reputation_System_Implementation}.md`). Schema `social`
+ (mig 0009) exists; logic + endpoints to build. **No-PII caveat:** DMs must ride D1
+ (or AT-Proto), not a central plaintext `social.message`.
+7. **Sequencer-lab inference — AppView lookup + consensus (NOT dropped).** The
+ **lookup API is DONE (2026-06-12)**: `GET /api/v1/sequencer/lab?instrument_id=…`
+ (→ `SequencerLabDto`, 404 if unknown) + `GET /api/v1/sequencer/lab-instruments`
+ (bulk cache seed), resolving via the **preseeded** `genomics.sequencer_instrument.
+ lab_id` (mig 0025 re-adds it; ETL backfills from the legacy tie;
+ `du_db::sequencer`). **Seeded (2026-06-13, mig 0038):** the old YDNA-Warehouse d2c
+ instrument→lab map — **5 labs + 36 instruments** (rows with `n_crams > 2`, max-frequency
+ lab; canonical full names FTDNA→Family Tree DNA / Dante Labs / Nebula Genomics / Full
+ Genomes Corporation / YSEQ, all `is_d2c`; `model_name`=export platform, `manufacturer`
+ derived). Idempotent (`ON CONFLICT (name) DO NOTHING` / `(instrument_id) DO UPDATE`);
+ the dev DB had 0 labs (legacy `public.sequencing_lab` is empty — hence the need). Source
+ `instrument_centers.tsv` (repo root, reference only). `lab_instruments.tsv` is just a
+ different view of the same data — already captured, no separate work. The
+ proposal/consensus path is **not live anywhere**, so the
+ lookup uses the direct tie (memory `sequencer-lab-lookup`). The **consensus
+ engine is DONE (2026-06-12)**: `du_db::sequencer::recompute_consensus` derives
+ observations from `fed.sequencerun ⋈ fed.biosample.center_name`, aggregates per
+ instrument into `instrument_association_proposal` (dominant lab, distinct-citizen
+ counts, confidence, threshold status, conflict→PENDING), run by `du-jobs run-once
+ sequencer-consensus` (+ hourly). Curator API `/manage/instrument-proposals[/:id[/
+ accept|/reject]]` — **accept sets `sequencer_instrument.lab_id`** (closing the loop
+ to the lookup), audited in-transaction via `du_db::audit::log`. Hardened for
+ production (mig 0026): is_d2c no longer clobbered, audit joins the mutation tx,
+ stable proposal ids (UPSERT not DELETE+re-INSERT), `pg_try_advisory_lock` guard,
+ aggregation/queue indexes. The **curator HTMX review UI is DONE (2026-06-12)** —
+ two-panel queue at `/curator/instrument-proposals` (status-filter chips, proposal
+ detail with supporting observations, accept form [lab/manufacturer/model/d2c] +
+ reject-with-reason), Curator-gated, i18n en/es/fr, on the dashboard. The
+ **`instrumentObservation` lexicon is DONE (2026-06-12)**: citizens publish
+ `com.decodingus.atmosphere.instrumentObservation` (real confidence KNOWN/INFERRED/
+ GUESSED + `observedAt`); the Jetstream consumer mirrors them into
+ `fed.instrument_observation` (mig 0027, `du_db::fed::instrument_observation`), and
+ `recompute_consensus` folds them in next to the implicit `center_name` claims with
+ **real confidence-level + recency scoring** (the score's recency/level terms were
+ constants, now computed in SQL). **D8 is complete** — remaining ideas are the
+ "Future Considerations" list (flowcell-level tracking, geographic inference,
+ publication cross-ref, reputation weighting). (`documents/planning/sequencer-lab-inference-system.md`.)
+8. **Smaller in-scope finishers:**
+ - **Graft carries coordinates forward** at creation (fold into
+ `get_or_create_variant`) so the decoding-us backfill isn't needed after each
+ re-graft.
+ - **YBrowse reconcile tail:** off-by-one / near-coordinate proximity detection;
+ an external synonym authority (YFull/ISOGG cross-refs) to assert "X = Y" across
+ genuinely different coordinates; per-name evidence consolidation.
+ - **WIP/merge review:** `EDIT_VARIANTS` resolution + cascading a graft-blocked
+ *subtree* from a single decision.
+ - **Branch age:** the McDonald model is built end-to-end (PDF engine, SNP + STR
+ tree propagation, multi-step `P(g|m)`, genealogical anchors, PDF-product combine,
+ seeded STR rates). Remaining refinements are **data-shaped, not architectural**:
+ the true b̄ coverage *intersection* (Eq 4 — needs per-sample callable intervals),
+ the Eq 9/10 causality back-correction, the PDF-at-scale perf check once a
+ densely-sampled subtree exists, and the lone missing single-copy STR rate (DYS447).
+ - **API:** surface unnamed variants (cross-repo change — `du-domain::Variant.
+ canonical_name` `String` → `Option`, shared with Navigator).
+ - More `fed.*` report shapes (genotype-provider mix, platform/test-type
+ distribution) as the UI needs them.
+9. **Tech debt** — JSONB consolidation is **done** (realized in the de-sprawl,
+ mig 0002/0004 — that analysis doc was removed); terms/privacy prose now mirrors
+ the legacy Scala content but is still "subject to legal review"; optional
+ internal/curator OpenAPI document; harden `du_db::variant::get_by_id` for a NULL
+ `canonical_name` (unnamed-variant edge).
+
+## Out of scope / deliberately absent (➖) — do NOT build
+
+- **Manual sample-ingestion APIs** (biosample create + sequences + publication-link)
+ — curators use Navigator now; the AppView keeps catalog **review + naming** only.
+- **BAM/CRAM extraction + variant calling** — done at the edge (Navigator); the
+ AppView aggregates summaries/proposals (so `du-bio` is text + coordinate math, no
+ htslib/noodles).
+- **The legacy PDS fleet / raw-data network mirror** — `fed.pds_node` /
+ `pds_heartbeat` / fleet-admin tables (mig 0008) map to the **dropped** mirror
+ design; don't build registration/heartbeat/fleet endpoints.
+- **AppView→PDS backfeed** — the AppView writes nothing back to PDSes (inbound-only
+ / notify-fetch direction).
+- **Patronage / billing** — not in production (`billing` placeholder; no logic).
+ **Deferred, not dead:** revive to fund infrastructure past ~a few hundred active
+ users (`documents/proposals/Patronage_Donation_System.md`; FAQ already names it).
+- (IBD matching, the social layer, and sequencer-lab inference are **back in
+ scope** — see "What's left" items 5–7. Their schemas remain placeholders pending
+ that build.)
+
+## Cutover blocker — VERIFIED (2026-06-04)
+
+The ETL has been run end-to-end against a **real production dump**
+(`/Users/jkane/backup_file.sql`, 363 MB, PG 15.18) and **all 34 aggregates
+reconcile**. Schema risk was already retired (`~/db.schema` is current prod);
+this run retired the data risk too.
+
+How it was run (repeatable):
+1. `CREATE ROLE decoding_us_user;` (the dump owns objects as this role), then a
+ fresh `decodingus_prod` DB.
+2. Load the dump, stripping the two `\restrict`/`\unrestrict` lines — the
+ container psql is **16.4**, which predates those meta-commands (added in the
+ Sept-2025 security releases): `grep -vE '^\\(un)?restrict' dump.sql |
+ container exec -i du-pg psql -U postgres -d decodingus_prod -q`.
+3. `decodingus-migrate --legacy --target `
+ (recreate the target first; the run migrates + transforms + reconciles).
+
+**The variant fold (commits fbc298a → cd37657):** legacy `public.variant` is one
+row per (SNP, build, mutation DIRECTION); `core.variant` is one row per physical
+SNP **site**. The transform folds by site (`dense_rank` over position) and carries
+per-branch ancestral/derived onto `tree.haplogroup_variant` (ASR model — see
+[[etl-cutover-verified]] / migration 0021). Real-data: variant 3,023,051 →
+2,899,782; haplogroup_variant → 86,744; all aggregates reconcile.
+
+## Cutover strategy (chosen 2026-06) — read-only freeze, prepare local, ship to AWS
+
+1. **Freeze prod** read-only (no write drift during migration).
+2. **Take a fresh dump**; load locally → `decodingus_prod` (role `decoding_us_user`
+ + strip `\restrict` lines for psql 16; see above).
+3. **Prepare locally** (the new-schema DB):
+ - ETL the **non-tree** data (donors, biosamples, pubs, variants, genomics) —
+ all reconcile today.
+ - Build the **tree separately, ISOGG-founded** (the chosen direction — see
+ "Tree build direction" below): `tree-init --isogg --apply` then
+ `--merge-prod --snp-graft --graft --apply`.
+ - (Optional) run the YBrowse ingest for full coordinate coverage.
+4. **Ship to AWS:** `pg_dump -Fc` the prepared DB, restore on AWS, point the new
+ codebase at it, flip.
+
+**The one ETL change this needs:** the ETL currently *migrates the prod
+decoding-us tree*; for the ISOGG-founded build it must **skip the tree transforms**
+(`haplogroup` / `haplogroup_relationship` / `haplogroup_variant`) and leave the
+tree to `tree-init`. Add a `--skip-tree` flag (or split tree transforms out). NOT
+yet built.
+
+**Two integration points to settle:**
+- **Name resolution must be alias-aware.** `biosample→haplogroup` is by **name**,
+ not FK (`core.biosample.original_haplogroups` JSONB, `fed.biosample.y/mt_haplogroup`
+ text) — so an ISOGG-founded tree works *because* decoding-us names live as
+ aliases on the ISOGG nodes. Verify tree-search + biosample views resolve via
+ aliases once a flip DB has data.
+- **Postgres version.** Prod dump is PG 15; the local container is 16. `pg_dump`
+ 16 → restore into 15 can break — run the new code's AWS instance on **PG 16**
+ (match local) or pin local to 15.
+
+### Tree build direction — ISOGG foundation + SNP-graft everything (decided)
+
+Sources differ in naming (ISOGG path-strings vs decoding-us/FTDNA SNP-names) AND
+root depth, so the exact-set name merge (`du_db::haplogroup::merge_into` /
+`du_domain::merge`) is useless cross-source — its subtree-scoping cascades a
+root-topology mismatch to NEW (matched=1, would duplicate 10,230 nodes). Use the
+**SNP-anchored graft** (`du_db::snp_graft`, `tree-init … --snp-graft`). Full
+investigation + recipe in memory [[tree-source-merge]]. Decisions:
+
+- **ISOGG is the foundation** (single `Y` root + curated backbone authority), then
+ graft decoding-us and FTDNA onto it. The reverse (FTDNA- or decoding-us-founded)
+ drops the deep-root region, becomes a rooted forest, and/or inverts naming
+ authority. Build:
+ `tree-init --isogg --apply` →
+ `--merge-prod --snp-graft --graft --apply` →
+ `--ftdna --graft --reattach --apply`.
+- **`--reattach` is required for FTDNA** (105k-node complete-topology source,
+ `/Volumes/nas/FTDNA/`, refreshed weekly). FTDNA merges SNP blocks ISOGG splits,
+ so a bush's backbone ancestor is often weak-flagged and the graft conservatively
+ *blocks* it (would drop 56,855 of 70,921). Reattach walks up to the nearest
+ ancestor the classifier cleanly **MATCHED** and attaches the bush there. (First
+ cut used a raw SNP→node index and dumped clades onto A00 — the catalog's junk
+ recurrent links, see "junk links" below, point single SNPs at basal nodes;
+ MATCH dispositions are vetted by SNP-set + subtree scope, so they don't.)
+- **Source tags parameterized** (commit 0e09060) — any source tags its own nodes;
+ the anchor/collision guard excludes only that source's prior graft.
+- **Result `decodingus_hybrid2`: 81,297 nodes, single ISOGG root**, ISOGG-named
+ backbone + decoding-us + full FTDNA depth (70,748 bushes; 16,117 reattached;
+ 173 unanchored; ~19 land on `CT`), source names folded in as aliases, ~42k
+ variants coord-enriched from FTDNA anc/der+position. Spot-verified:
+ `I-BY136871 → I1a3a1b`, `G-FTH55879 → G2a2b2a1a1b1a1`; basal nodes near-empty.
+- **JUNK LINKS — SCRUBBED (commit 7a0487d).** ~1.2k catalog variants were linked
+ to haplogroups across unrelated macro-clades (decoding-us ASR scatter onto
+ A00/H/O; also FTDNA shared-SNP blocks), which nearly broke the FTDNA reattach.
+ `du_db::haplogroup::scrub_recurrent_links` (`tree-init --scrub-recurrent
+ [--apply]`) keeps each variant's primary (most-concentrated) lineage — by tree
+ ancestry, not names — and soft-deletes the off-lineage occurrences (self-name
+ tiebreak for fully-scattered cases, e.g. `CTS9108`). Operates only on
+ `haplogroup_variant`, never on topology. Applied to `decodingus_hybrid2`:
+ cross-macro-clade variants 1,200 → 1, 10,908 links pruned, 81,297 nodes
+ unchanged. The 45 residue on `decodingus_etl` are legitimate basal chains
+ (Y→I1, NO→O) the ancestry criterion correctly keeps.
+
+### mtDNA tree — FTDNA-only foundation (wired, commit b7c9748)
+
+Legacy prod has only Y (2,695 nodes), **zero MT** — no decoding-us mt source, mt
+API, or biosample mt assignment. So the mt-tree is FTDNA-only: load FTDNA's single
+RSRS-rooted mt haplotree as the **foundation** (merge_into into an empty MT
+namespace) — no graft/merge/reattach/scrub.
+- `tree-init --ftdna /Volumes/nas/FTDNA/ftdna_mttree.json --ftdna-foundation
+ --dna MT --apply`. `ftdna_foundation_roots` builds the nested merge tree.
+- **Privacy differs from the Y graft:** `kitsCount==0` on RSRS and internal splits
+ means "no kit terminates here", NOT "private individual" — backbone/internal
+ nodes are kept; only private LEAVES (kits==0, no kept descendants) drop.
+- **NOT scrubbed:** mtDNA homoplasy (16189, 152, …) is real and FTDNA-curated;
+ 1,759 multi-branch variants are legit recurrence, not ASR junk.
+- Variants are RSRS-frame (`G263A`: anc G, der A @263 — rCRS has A there);
+ coordinates `{chrM, position, ancestral, derived}`.
+- Verified on `decodingus_hybrid2`: 4,740 nodes, single RSRS root, 56 backbone
+ clades, correct PhyloTree topology (L0`.
+- **utoipa** kept out of `du-domain` (shared with Navigator/edge); API DTOs +
+ `From` impls live in `du-web/api.rs`. Recursive `HaplogroupNodeDto.children`
+ needs `#[schema(no_recursion)]`.
+- **Management API namespace**: curator/machine endpoints live under **`/manage/*`**
+ (not `/api/v1`) and are excluded from the public OpenAPI doc. Auth is
+ session/`Curator`; unauth → 303 to /login even for JSON endpoints.
+- **`DbError::Conflict` → HTTP 422** (mapped in `du-web/error.rs`).
+- **ETL preserves PKs** via `OVERRIDING SYSTEM VALUE` + `sample_guid`; sequences
+ fixed up post-load; idempotent upserts.
+- **i18n**: adding a UI string requires es/fr entries — `cargo test -p du-web`
+ enforces parity.
+
+## Resume checklist
+
+1. `eval "$(./scripts/test-db.sh up)"` (or set `DATABASE_URL`); confirm `du-pg`
+ container is running.
+2. `cargo test --workspace` (du-domain needs no DB; du-db live tests provision
+ ephemeral DBs from `DATABASE_URL`).
+3. Pick the next arc — launch-critical is **cutover** + the **OAuth Edge test**.
+ Post-launch: the **collaboration/IBD platform** starts at **D1** (the shared
+ encrypted-exchange substrate, `documents/planning/d1-encrypted-edge-exchange.md`),
+ then D2→D4→D5 (platform) / D3 (IBD); the **Catalog** track (haplogroup-discovery
+ automation = D6, multi-test = D7, sequencer-lab = D8) is independent. Remaining
+ doc cleanup: `documents/atmosphere/` still references removed docs (flagged in the
+ triage reports).
+4. Reload the mock if needed: recreate `decodingus_legacy`, load
+ `scripts/mock-legacy.sql`; recreate `decodingus_etl`; run `decodingus-migrate`.
+
+## Reference paths
+
+- **Post-launch design specs:** `documents/planning/d1`–`d5` +
+ `design-roadmap-rust-rewrite.md` (collaboration/IBD platform, no-PII broker).
+- **Design-doc triage reports:** `documents/planning/design-doc-triage-report.md`,
+ `documents/proposals/triage-report.md` (what was removed/reconciled + remaining
+ `atmosphere/` ref cleanup).
+- Prod schema (authoritative for ETL, confirmed current 2026-06): `~/db.schema`
+- Old data dump (may lag — get a fresh one for cutover): `/Volumes/nas/stuff/dump.sql`
+- AT Proto notes: `docs/atproto-oauth-findings.md`, `docs/atproto-edge-reply.md`
+- **Scala↔Rust functional diff catalog: `docs/scala-vs-rust-diff.md`**
+- Navigator atmosphere docs: `/Users/jkane/Development/DUNavigator/documents/atmosphere`
diff --git a/rust/compose.yaml b/rust/compose.yaml
new file mode 100644
index 00000000..d66ba574
--- /dev/null
+++ b/rust/compose.yaml
@@ -0,0 +1,43 @@
+# DecodingUs (Rust) — production-ish compose. Works with Docker or Apple
+# `container compose`. Mirrors the deployment intent of the legacy compose.
+#
+# docker compose up --build (or: container compose up --build)
+
+services:
+ db:
+ image: postgis/postgis:16-3.4
+ environment:
+ POSTGRES_PASSWORD: ${DU_PG_PASSWORD:-dev}
+ POSTGRES_DB: ${DU_PG_DB:-decodingus}
+ ports:
+ - "5432:5432"
+ volumes:
+ - du-pgdata:/var/lib/postgresql/data
+ healthcheck:
+ test: ["CMD-SHELL", "pg_isready -U postgres -d ${DU_PG_DB:-decodingus}"]
+ interval: 10s
+ timeout: 5s
+ retries: 10
+
+ app:
+ build:
+ context: .
+ dockerfile: Dockerfile
+ environment:
+ DATABASE_URL: postgres://postgres:${DU_PG_PASSWORD:-dev}@db:5432/${DU_PG_DB:-decodingus}?sslmode=disable
+ APP_SECRET: ${APP_SECRET:-changeme}
+ RUST_LOG: ${RUST_LOG:-info,du_web=debug}
+ ports:
+ - "9000:9000"
+ depends_on:
+ db:
+ condition: service_healthy
+ restart: unless-stopped
+ healthcheck:
+ test: ["CMD", "curl", "-fsS", "http://localhost:9000/health"]
+ interval: 30s
+ timeout: 10s
+ retries: 3
+
+volumes:
+ du-pgdata:
diff --git a/rust/crates/du-db/Cargo.toml b/rust/crates/du-db/Cargo.toml
new file mode 100644
index 00000000..35e7fa33
--- /dev/null
+++ b/rust/crates/du-db/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+name = "du-db"
+version.workspace = true
+edition.workspace = true
+rust-version.workspace = true
+license.workspace = true
+
+# Data-access layer: SQLx pool + per-aggregate query modules. Runtime-checked
+# queries for now (no live DB in this environment); migrate to compile-time
+# `query_as!` + committed `.sqlx` offline cache once a dev DB is reachable.
+[dependencies]
+du-domain = { workspace = true }
+sqlx = { workspace = true }
+tokio = { workspace = true }
+serde = { workspace = true }
+serde_json = { workspace = true }
+uuid = { workspace = true }
+chrono = { workspace = true }
+thiserror = { workspace = true }
+tracing = { workspace = true }
+
+[dev-dependencies]
+chrono = { workspace = true }
+du-domain = { workspace = true }
diff --git a/rust/crates/du-db/build.rs b/rust/crates/du-db/build.rs
new file mode 100644
index 00000000..9271537b
--- /dev/null
+++ b/rust/crates/du-db/build.rs
@@ -0,0 +1,7 @@
+// `sqlx::migrate!` embeds the migrations directory at COMPILE time. Without this
+// hint, adding/editing a .sql file does not rebuild du-db, so the embedded set
+// goes stale and migrations silently fail to apply. Watch the directory so any
+// change forces a recompile.
+fn main() {
+ println!("cargo:rerun-if-changed=../../migrations");
+}
diff --git a/rust/crates/du-db/src/age.rs b/rust/crates/du-db/src/age.rs
new file mode 100644
index 00000000..8cb3c851
--- /dev/null
+++ b/rust/crates/du-db/src/age.rs
@@ -0,0 +1,653 @@
+//! Combined branch-age estimation (McDonald 2021 — see
+//! `documents/proposals/branch-age-estimation.md`). Independent evidence terms
+//! (STR variance, SNP counting, genealogical/aDNA anchors) are each stored as a
+//! method-labeled row in `tree.haplogroup_age_estimate`; this module computes the
+//! SNP and genealogical terms and **combines all available terms** by the direct
+//! product of their PDFs (McDonald Eq 1, `P(t|e)=k·∏P(t|eᵢ)`) — preserving each
+//! term's non-Gaussian shape (Poisson skew, STR convergent-mutation tails) rather
+//! than inverse-variance-averaging medians. It writes a `COMBINED` estimate and
+//! gap-fills `tree.haplogroup.tmrca_ybp` (a curated value is never overwritten).
+//! Disjoint terms (no overlapping support) fall back to the inverse-variance
+//! Gaussian combine, which can't annihilate.
+//!
+//! The STR term is produced by [`crate::ystr`]. SNP/genealogical terms are
+//! data-gated: they only emit where private-variant/callable-loci or anchor data
+//! exists (sparse until ETL cutover / curation), but the framework is correct and
+//! extends to the full combined age as that data lands.
+
+use crate::pdf::Pdf;
+use crate::DbError;
+use sqlx::PgPool;
+use std::collections::{BTreeMap, BTreeSet, HashMap};
+
+/// MSY combined SNP mutation rate (SNPs/bp/year, Helgason 2015). This is the rate
+/// the model applies — see `documents/proposals/branch-age-estimation.md`.
+pub const SNP_RATE: f64 = 8.33e-10;
+
+/// Independent cross-check clock from Hallast et al. 2026 (142 population-scale Y
+/// assemblies, BEAST v1.10.4 strict molecular clock on the X-degenerate mask):
+/// **0.76 × 10⁻⁹ sub/site/yr (95% CI 0.67–0.86 × 10⁻⁹)** — ~9% slower than
+/// Helgason. Recorded for provenance/comparison **only**; `recompute_combined_ages`
+/// does *not* swap to it (a slower clock makes every TMRCA ~9% older). Use it to
+/// sanity-check our SNP ages or to bound the rate-uncertainty band, not as the
+/// default. CI bounds: [`HALLAST_RATE_LO`], [`HALLAST_RATE_HI`].
+pub const HALLAST_RATE: f64 = 0.76e-9;
+pub const HALLAST_RATE_LO: f64 = 0.67e-9;
+pub const HALLAST_RATE_HI: f64 = 0.86e-9;
+
+/// "Before present" reference year (radiocarbon convention) for calendar anchors.
+pub const PRESENT_YEAR: i32 = 1950;
+
+// ── PDF-based tree propagation (McDonald 2021 §2.2, Eq 5–8) ───────────────────
+//
+// The SNP age of a clade is built bottom-up: a node's TMRCA is the product over
+// its children of (the child's own TMRCA convolved with the parent→child branch
+// time), per Eq 8. Each factor is a Poisson age PDF (Eq 3) over the branch's SNP
+// count and callable bp. A node's "formed" age is its TMRCA convolved with its
+// own branch time — i.e. when its lineage split from its parent. This is the pure
+// algorithm; `recompute_combined_ages` (below) supplies the DB-derived inputs.
+//
+// Not yet modelled here (documented follow-ups): the exact b̄ coverage
+// *intersection* across sub-clades (Eq 4 — needs per-sample callable intervals,
+// not just totals), and the Eq 9/10 causality back-correction (the bottom-up
+// convolution already keeps a parent older than its children in the common case).
+
+/// One clade (haplogroup node) of the propagation input.
+#[derive(Debug, Clone, Default)]
+pub struct Clade {
+ /// SNPs on the edge from this node's parent down to it (`m_{parent→node}`):
+ /// the branch time when this node feeds its parent, and its own "formed" age.
+ /// 0 for a root.
+ pub branch_snps: i64,
+ /// Effective callable bp (`b̄`) over which this clade's SNPs are counted.
+ pub callable_bp: f64,
+ /// Child clade indices.
+ pub children: Vec,
+ /// Private-SNP counts of testers sitting directly on this node (terminal tips);
+ /// each contributes a Poisson age factor (tester birth ≈ present is omitted as
+ /// a negligible offset).
+ pub tester_snps: Vec,
+}
+
+/// A clade's computed age PDFs.
+#[derive(Debug, Clone)]
+pub struct CladeAge {
+ /// TMRCA of the node's sampled descendants.
+ pub tmrca: Pdf,
+ /// When the node's lineage split from its parent (`TMRCA ⊛ branch time`).
+ pub formed: Pdf,
+}
+
+/// Grid for the whole-tree propagation. Coarser/wider than the PDF default: Y
+/// TMRCAs run from recent surname clades to ~300 ky (A00), so 50-yr bins over
+/// 350 ky keep convolution affordable while spanning the deepest nodes.
+pub const TREE_RESOLUTION_YEARS: f64 = 50.0;
+pub const TREE_MAX_AGE_YEARS: f64 = 350_000.0;
+
+/// Branch-time PDF for clade `x`: `P(t | m_branch)` over its callable bp.
+fn branch_time(clades: &[Clade], x: usize, mu: f64, res: f64, max_age: f64) -> Pdf {
+ Pdf::poisson_on(clades[x].branch_snps, clades[x].callable_bp, mu, res, max_age)
+}
+
+fn compute_tmrca(
+ i: usize,
+ clades: &[Clade],
+ mu: f64,
+ res: f64,
+ max_age: f64,
+ memo: &mut [Option>],
+) {
+ if memo[i].is_some() {
+ return;
+ }
+ memo[i] = Some(None); // guard against accidental cycles
+ let mut factors: Vec = Vec::new();
+ for &ch in &clades[i].children {
+ compute_tmrca(ch, clades, mu, res, max_age, memo);
+ if let Some(Some(ct)) = &memo[ch] {
+ factors.push(ct.convolve(&branch_time(clades, ch, mu, res, max_age)));
+ }
+ }
+ for &s in &clades[i].tester_snps {
+ factors.push(Pdf::poisson_on(s, clades[i].callable_bp, mu, res, max_age));
+ }
+ let result = factors.split_first().map(|(first, rest)| {
+ rest.iter().fold(first.clone(), |acc, f| acc.multiply(f))
+ });
+ memo[i] = Some(result);
+}
+
+/// Compute every clade's TMRCA + formed-age PDFs bottom-up (Eq 8) on a
+/// `res`-year grid spanning `[0, max_age]`. A clade with no evidence (no children
+/// with ages, no testers) yields `None`.
+pub fn propagate(clades: &[Clade], mu: f64, res: f64, max_age: f64) -> Vec> {
+ let mut memo: Vec >> = vec![None; clades.len()];
+ for i in 0..clades.len() {
+ compute_tmrca(i, clades, mu, res, max_age, &mut memo);
+ }
+ (0..clades.len())
+ .map(|i| {
+ let Some(Some(tmrca)) = memo[i].take() else { return None };
+ let formed = tmrca.convolve(&branch_time(clades, i, mu, res, max_age));
+ Some(CladeAge { tmrca, formed })
+ })
+ .collect()
+}
+
+/// SNPs in heterochromatic sequence are masked from age counting — they sit
+/// outside the callable denominator (`y_xdegen+y_ampliconic+y_palindromic`) and
+/// the paper excises recurrent regions self-consistently (Appendix A.2/A.3).
+/// Ampliconic and palindromic SNPs are kept (same rate as X-degenerate). This is
+/// a SQL fragment testing `core.variant v` for any `heterochromatin:` overlap.
+const HET_MASK: &str = "NOT EXISTS (SELECT 1 FROM \
+ jsonb_array_elements_text(COALESCE(v.annotations->'region_overlaps','[]'::jsonb)) e \
+ WHERE e LIKE 'heterochromatin:%')";
+
+/// Build the propagation input from the current Y tree: nodes, parent→child
+/// edges, het-masked branch (defining) SNP counts, and per-node tester data
+/// (active private-SNP counts + callable bp). Returns `(clades, haplogroup_ids)`
+/// where `haplogroup_ids[i]` is the DB id of clade `i`.
+async fn build_clades(pool: &PgPool) -> Result<(Vec, Vec), DbError> {
+ // Stable index over current Y nodes.
+ let ids: Vec = sqlx::query_scalar(
+ "SELECT id FROM tree.haplogroup \
+ WHERE haplogroup_type='Y_DNA'::core.dna_type AND valid_until IS NULL ORDER BY id",
+ )
+ .fetch_all(pool)
+ .await?;
+ let idx: HashMap = ids.iter().enumerate().map(|(i, &id)| (id, i)).collect();
+ let mut clades = vec![Clade::default(); ids.len()];
+
+ // Edges → children (a child carries its own branch SNPs).
+ let edges: Vec<(i64, i64)> = sqlx::query_as(
+ "SELECT c.id, p.id FROM tree.haplogroup_relationship r \
+ JOIN tree.haplogroup c ON c.id=r.child_haplogroup_id AND c.valid_until IS NULL \
+ AND c.haplogroup_type='Y_DNA'::core.dna_type \
+ JOIN tree.haplogroup p ON p.id=r.parent_haplogroup_id AND p.valid_until IS NULL \
+ WHERE r.valid_until IS NULL",
+ )
+ .fetch_all(pool)
+ .await?;
+ for (c, p) in edges {
+ if let (Some(&ci), Some(&pi)) = (idx.get(&c), idx.get(&p)) {
+ clades[pi].children.push(ci);
+ }
+ }
+
+ // Branch defining-SNP counts (het-masked).
+ let branch: Vec<(i64, i64)> = sqlx::query_as(&format!(
+ "SELECT hv.haplogroup_id, count(*)::bigint FROM tree.haplogroup_variant hv \
+ JOIN core.variant v ON v.id=hv.variant_id \
+ WHERE hv.valid_until IS NULL AND {HET_MASK} GROUP BY hv.haplogroup_id"
+ ))
+ .fetch_all(pool)
+ .await?;
+ for (hg, n) in branch {
+ if let Some(&i) = idx.get(&hg) {
+ clades[i].branch_snps = n;
+ }
+ }
+
+ // Testers: per (node, sample) active private-SNP count (het-masked) + that
+ // sample's Y callable bp (xdegen+ampliconic+palindromic, else total).
+ let cbp = "COALESCE(NULLIF(COALESCE(cl.y_xdegen_callable_bp,0)+COALESCE(cl.y_ampliconic_callable_bp,0)\
+ +COALESCE(cl.y_palindromic_callable_bp,0),0), cl.total_callable_bp, 0)";
+ let testers: Vec<(i64, i64, f64)> = sqlx::query_as(&format!(
+ "SELECT pv.terminal_haplogroup_id, count(*)::bigint, max({cbp})::float8 \
+ FROM tree.biosample_private_variant pv \
+ JOIN core.variant v ON v.id=pv.variant_id \
+ LEFT JOIN genomics.biosample_callable_loci cl \
+ ON cl.sample_guid=pv.sample_guid AND cl.chromosome IN ('chrY','Y') \
+ WHERE pv.status='ACTIVE' AND pv.haplogroup_type='Y_DNA'::core.dna_type \
+ AND pv.terminal_haplogroup_id IS NOT NULL AND {HET_MASK} \
+ GROUP BY pv.terminal_haplogroup_id, pv.sample_guid"
+ ))
+ .fetch_all(pool)
+ .await?;
+ let (mut bp_sum, mut bp_cnt) = (vec![0.0f64; ids.len()], vec![0u32; ids.len()]);
+ for (hg, snps, b) in testers {
+ if let (Some(&i), true) = (idx.get(&hg), b > 0.0) {
+ clades[i].tester_snps.push(snps);
+ bp_sum[i] += b;
+ bp_cnt[i] += 1;
+ }
+ }
+
+ // Representative b̄ per node: mean of its testers' callable bp, else the
+ // catalog-wide mean (so SNP-less internal branches still get a branch time).
+ let default_b: f64 = sqlx::query_scalar::<_, Option>(&format!(
+ "SELECT avg({cbp})::float8 FROM genomics.biosample_callable_loci cl WHERE cl.chromosome IN ('chrY','Y')"
+ ))
+ .fetch_one(pool)
+ .await?
+ .filter(|b| *b > 0.0)
+ .unwrap_or(15_000_000.0);
+ for i in 0..ids.len() {
+ clades[i].callable_bp = if bp_cnt[i] > 0 { bp_sum[i] / bp_cnt[i] as f64 } else { default_b };
+ }
+
+ Ok((clades, ids))
+}
+
+/// Combine independent Gaussian age estimates `(mean_ybp, sigma_ybp)` by
+/// inverse-variance weighting: `µ = Σ(wᵢµᵢ)/Σwᵢ`, `σ² = 1/Σwᵢ`, `wᵢ = 1/σᵢ²`.
+/// A non-positive sigma falls back to 25% of the mean (min 1) so a point estimate
+/// without a usable CI still contributes (weakly). Returns `(mean, sigma)`.
+pub fn combine(estimates: &[(f64, f64)]) -> Option<(f64, f64)> {
+ let mut wsum = 0.0;
+ let mut wxsum = 0.0;
+ for &(mean, sigma) in estimates {
+ let s = if sigma > 0.0 { sigma } else { (mean * 0.25).max(1.0) };
+ let w = 1.0 / (s * s);
+ wsum += w;
+ wxsum += w * mean;
+ }
+ if wsum <= 0.0 {
+ return None;
+ }
+ Some((wxsum / wsum, (1.0 / wsum).sqrt()))
+}
+
+#[derive(sqlx::FromRow)]
+struct AnchorRow {
+ haplogroup_id: i64,
+ date_ce: Option,
+ carbon_date_bp: Option,
+ uncertainty_years: Option,
+}
+
+#[derive(Debug, Default)]
+pub struct CombineStats {
+ pub snp: usize,
+ pub genealogical: usize,
+ pub combined: usize,
+}
+
+/// Recompute the SNP and genealogical age terms, then the COMBINED estimate for
+/// every branch with ≥1 term, gap-filling `tmrca_ybp`. COMBINED is the direct PDF
+/// product (Eq 1) of the SNP TMRCA PDF (propagation), the STR TMRCA PDF
+/// ([`crate::ystr::str_tmrca_pdfs`]), and the genealogical anchor PDF — all on the
+/// shared TREE grid. Full refresh of the computed methods (`SNP_POISSON`,
+/// `GENEALOGICAL`, `COMBINED`); `STR_VARIANCE` (from `ystr`) and curated values are
+/// left intact.
+pub async fn recompute_combined_ages(pool: &PgPool) -> Result {
+ let mut tx = pool.begin().await?;
+ let mut stats = CombineStats::default();
+
+ sqlx::query("DELETE FROM tree.haplogroup_age_estimate WHERE method IN ('SNP_POISSON','GENEALOGICAL','COMBINED')")
+ .execute(&mut *tx)
+ .await?;
+
+ // ── SNP-Poisson term: tree propagation (McDonald Eq 5–8) ──────────────────
+ // Build the clade tree, propagate TMRCA/formed PDFs bottom-up, then store a
+ // SNP_POISSON term per scored node (median + 95% CI of its TMRCA) and gap-fill
+ // `formed_ybp`. The COMBINED step below fills `tmrca_ybp`. Heterochromatic SNPs
+ // are masked from both `m` and (already) the callable denominator (`HET_MASK`).
+ let (clades, ids) = build_clades(pool).await?;
+ let ages = propagate(&clades, SNP_RATE, TREE_RESOLUTION_YEARS, TREE_MAX_AGE_YEARS);
+ // Keep each term's actual PDF (on the shared TREE grid) for the Eq-1 product below.
+ let mut snp_pdf: HashMap = HashMap::new();
+ for (i, age) in ages.iter().enumerate() {
+ let Some(age) = age else { continue };
+ let (med, lo, hi) = age.tmrca.ci95();
+ snp_pdf.insert(ids[i], age.tmrca.clone());
+ upsert_estimate_ci(
+ &mut tx,
+ ids[i],
+ "SNP_POISSON",
+ med.round() as i32,
+ lo.round() as i32,
+ hi.round() as i32,
+ clades[i].tester_snps.len() as i32,
+ )
+ .await?;
+ // Node formation age — gap-fill only (never overwrite a curated value).
+ sqlx::query("UPDATE tree.haplogroup SET formed_ybp=$2 WHERE id=$1 AND formed_ybp IS NULL")
+ .bind(ids[i])
+ .bind(age.formed.median().round() as i32)
+ .execute(&mut *tx)
+ .await?;
+ stats.snp += 1;
+ }
+
+ // ── Genealogical / aDNA anchors ───────────────────────────────────────────
+ // Per branch, combine its anchors into one GENEALOGICAL term.
+ let anchors: Vec = sqlx::query_as(
+ "SELECT haplogroup_id, date_ce, carbon_date_bp, \
+ details->>'uncertainty_years' AS uncertainty_years \
+ FROM tree.genealogical_anchor",
+ )
+ .fetch_all(&mut *tx)
+ .await?;
+ let mut by_hg: BTreeMap> = BTreeMap::new();
+ for a in anchors {
+ let ybp = match (a.carbon_date_bp, a.date_ce) {
+ (Some(c), _) => c as f64,
+ (None, Some(d)) => (PRESENT_YEAR - d) as f64,
+ _ => continue,
+ };
+ if ybp < 0.0 {
+ continue;
+ }
+ // Sigma: explicit uncertainty_years, else 10% of the age (min 25 yr).
+ let sigma = a
+ .uncertainty_years
+ .and_then(|u| u.parse::().ok())
+ .filter(|s| *s > 0.0)
+ .unwrap_or((ybp * 0.10).max(25.0));
+ by_hg.entry(a.haplogroup_id).or_default().push((ybp, sigma));
+ }
+ let mut gen_pdf: HashMap = HashMap::new();
+ for (hg, ests) in &by_hg {
+ if let Some((mean, sigma)) = combine(ests) {
+ let rel = if mean > 0.0 { sigma / mean } else { 0.0 };
+ gen_pdf.insert(*hg, Pdf::gaussian_on(mean, sigma, TREE_RESOLUTION_YEARS, TREE_MAX_AGE_YEARS));
+ upsert_estimate(&mut tx, *hg, "GENEALOGICAL", mean, rel, None, None).await?;
+ stats.genealogical += 1;
+ }
+ }
+
+ // STR term: tree-propagated TMRCA PDFs on the same grid (the STR_VARIANCE rows
+ // are written separately by `crate::ystr::recompute_signatures`, from the same
+ // computation). Any stored STR_VARIANCE row with no fresh PDF — a curated value,
+ // or one predating profile data — still contributes, reconstructed as a Gaussian.
+ let mut str_pdf = crate::ystr::str_tmrca_pdfs(pool, TREE_RESOLUTION_YEARS, TREE_MAX_AGE_YEARS).await?;
+ let str_rows: Vec<(i64, i32, Option, Option)> = sqlx::query_as(
+ "SELECT haplogroup_id, estimate_ybp, ci_low_ybp, ci_high_ybp \
+ FROM tree.haplogroup_age_estimate WHERE method='STR_VARIANCE' AND estimate_ybp IS NOT NULL",
+ )
+ .fetch_all(&mut *tx)
+ .await?;
+ for (hg, est, lo, hi) in str_rows {
+ if str_pdf.contains_key(&hg) {
+ continue;
+ }
+ let mean = est as f64;
+ let sigma = match (lo, hi) {
+ (Some(l), Some(h)) if h > l => (h - l) as f64 / (2.0 * 1.96),
+ _ => (mean * 0.25).max(1.0),
+ };
+ str_pdf.insert(hg, Pdf::gaussian_on(mean, sigma, TREE_RESOLUTION_YEARS, TREE_MAX_AGE_YEARS));
+ }
+
+ // ── Combine all method terms per branch (McDonald Eq 1: P(t|all)=k·∏P(t|eᵢ)) ──
+ // Multiply the actual term PDFs rather than inverse-variance-averaging their
+ // medians, so non-Gaussian shape (Poisson skew, STR convergent-mutation tails)
+ // is preserved. If the terms are disjoint (product underflows to zero mass) the
+ // node falls back to the inverse-variance Gaussian combine, which can't annihilate.
+ let mut nodes: BTreeSet = BTreeSet::new();
+ nodes.extend(snp_pdf.keys().chain(gen_pdf.keys()).chain(str_pdf.keys()).copied());
+ for hg in nodes {
+ let factors: Vec<&Pdf> =
+ [snp_pdf.get(&hg), gen_pdf.get(&hg), str_pdf.get(&hg)].into_iter().flatten().collect();
+ let Some((first, rest)) = factors.split_first() else { continue };
+ let product = rest.iter().fold((*first).clone(), |acc, f| acc.multiply(f));
+ let combined = if product.total() > 0.0 {
+ product
+ } else {
+ let params: Vec<(f64, f64)> = factors.iter().map(|p| pdf_gaussian_params(p)).collect();
+ match combine(¶ms) {
+ Some((mean, sigma)) => Pdf::gaussian_on(mean, sigma, TREE_RESOLUTION_YEARS, TREE_MAX_AGE_YEARS),
+ None => (*first).clone(),
+ }
+ };
+ let (med, lo, hi) = combined.ci95();
+ upsert_estimate_ci(
+ &mut tx,
+ hg,
+ "COMBINED",
+ med.round() as i32,
+ lo.round() as i32,
+ hi.round() as i32,
+ factors.len() as i32,
+ )
+ .await?;
+ // Gap-fill the authoritative tmrca_ybp (never overwrite a curated value).
+ sqlx::query("UPDATE tree.haplogroup SET tmrca_ybp = $2 WHERE id = $1 AND tmrca_ybp IS NULL")
+ .bind(hg)
+ .bind(med.round() as i32)
+ .execute(&mut *tx)
+ .await?;
+ stats.combined += 1;
+ }
+
+ tx.commit().await?;
+ Ok(stats)
+}
+
+/// `(median, sigma)` Gaussian approximation of a PDF (sigma from its 95% CI) — used
+/// only for the disjoint-terms fallback in the combine.
+fn pdf_gaussian_params(p: &Pdf) -> (f64, f64) {
+ let (med, lo, hi) = p.ci95();
+ (med, ((hi - lo) / (2.0 * 1.96)).max(1.0))
+}
+
+/// Upsert a point estimate with a relative-error CI.
+async fn upsert_estimate(
+ tx: &mut sqlx::Transaction<'_, sqlx::Postgres>,
+ hg: i64,
+ method: &str,
+ years: f64,
+ rel: f64,
+ marker_or_snp_count: Option,
+ sample_count: Option,
+) -> Result<(), DbError> {
+ let lo = (years * (1.0 - 1.96 * rel)).max(0.0).round() as i32;
+ let hi = (years * (1.0 + 1.96 * rel)).round() as i32;
+ sqlx::query(
+ "INSERT INTO tree.haplogroup_age_estimate \
+ (haplogroup_id, method, estimate_ybp, ci_low_ybp, ci_high_ybp, sample_count, marker_count, computed_at) \
+ VALUES ($1,$2,$3,$4,$5,$6,$7, now()) \
+ ON CONFLICT (haplogroup_id, method) DO UPDATE SET \
+ estimate_ybp=EXCLUDED.estimate_ybp, ci_low_ybp=EXCLUDED.ci_low_ybp, ci_high_ybp=EXCLUDED.ci_high_ybp, \
+ sample_count=EXCLUDED.sample_count, marker_count=EXCLUDED.marker_count, computed_at=now()",
+ )
+ .bind(hg)
+ .bind(method)
+ .bind(years.round() as i32)
+ .bind(lo)
+ .bind(hi)
+ .bind(sample_count)
+ .bind(marker_or_snp_count)
+ .execute(&mut **tx)
+ .await?;
+ Ok(())
+}
+
+/// Upsert with explicit CI bounds (the COMBINED term).
+async fn upsert_estimate_ci(
+ tx: &mut sqlx::Transaction<'_, sqlx::Postgres>,
+ hg: i64,
+ method: &str,
+ est: i32,
+ lo: i32,
+ hi: i32,
+ term_count: i32,
+) -> Result<(), DbError> {
+ sqlx::query(
+ "INSERT INTO tree.haplogroup_age_estimate \
+ (haplogroup_id, method, estimate_ybp, ci_low_ybp, ci_high_ybp, sample_count, computed_at) \
+ VALUES ($1,$2,$3,$4,$5,$6, now()) \
+ ON CONFLICT (haplogroup_id, method) DO UPDATE SET \
+ estimate_ybp=EXCLUDED.estimate_ybp, ci_low_ybp=EXCLUDED.ci_low_ybp, ci_high_ybp=EXCLUDED.ci_high_ybp, \
+ sample_count=EXCLUDED.sample_count, computed_at=now()",
+ )
+ .bind(hg)
+ .bind(method)
+ .bind(est)
+ .bind(lo)
+ .bind(hi)
+ .bind(term_count)
+ .execute(&mut **tx)
+ .await?;
+ Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn inverse_variance_combine() {
+ // Two equally-precise estimates → mean between them, sigma tighter than either.
+ let (mean, sigma) = combine(&[(3000.0, 300.0), (3300.0, 300.0)]).unwrap();
+ assert!((mean - 3150.0).abs() < 1.0);
+ assert!(sigma < 300.0 && sigma > 200.0);
+ // A tighter estimate pulls the mean toward it.
+ let (mean2, _) = combine(&[(3000.0, 50.0), (5000.0, 1000.0)]).unwrap();
+ assert!(mean2 < 3100.0, "tight 3000±50 dominates, got {mean2}");
+ assert!(combine(&[]).is_none());
+ }
+
+ // Propagation tests use b·µ = 0.01 (b = 1.25e7, µ = 8e-10) so a Poisson age has
+ // a clean mode of m/(b·µ) = 100·m years.
+ const B: f64 = 1.25e7;
+ const MU: f64 = 8e-10;
+ // Small ages here → use the fine default PDF grid.
+ const RES: f64 = crate::pdf::RESOLUTION_YEARS;
+ const MAXA: f64 = crate::pdf::MAX_AGE_YEARS;
+
+ #[test]
+ fn tmrca_of_single_tester_is_poisson_mode() {
+ let clades = vec![Clade { branch_snps: 0, callable_bp: B, children: vec![], tester_snps: vec![3] }];
+ let ages = propagate(&clades, MU, RES, MAXA);
+ let tmrca = &ages[0].as_ref().unwrap().tmrca;
+ assert!((tmrca.mode() - 300.0).abs() <= 10.0, "mode {}", tmrca.mode());
+ }
+
+ #[test]
+ fn parent_is_older_than_child_and_formed_exceeds_tmrca() {
+ // parent(0) → child(1); child has 2 private SNPs and is 1 SNP below parent.
+ let clades = vec![
+ Clade { branch_snps: 0, callable_bp: B, children: vec![1], tester_snps: vec![] },
+ Clade { branch_snps: 1, callable_bp: B, children: vec![], tester_snps: vec![2] },
+ ];
+ let ages = propagate(&clades, MU, RES, MAXA);
+ let parent = ages[0].as_ref().unwrap();
+ let child = ages[1].as_ref().unwrap();
+ // Parent TMRCA = child TMRCA convolved with the branch → strictly older.
+ assert!(parent.tmrca.median() > child.tmrca.median(), "causality");
+ // A node's formed age (split from parent) is older than its own TMRCA.
+ assert!(child.formed.median() > child.tmrca.median(), "formed > tmrca");
+ }
+
+ #[test]
+ fn more_children_tighten_the_parent_ci() {
+ let leaf = |b| Clade { branch_snps: 1, callable_bp: b, children: vec![], tester_snps: vec![2] };
+ let one = vec![
+ Clade { branch_snps: 0, callable_bp: B, children: vec![1], tester_snps: vec![] },
+ leaf(B),
+ ];
+ let two = vec![
+ Clade { branch_snps: 0, callable_bp: B, children: vec![1, 2], tester_snps: vec![] },
+ leaf(B),
+ leaf(B),
+ ];
+ let width = |ages: &[Option]| {
+ let (_, lo, hi) = ages[0].as_ref().unwrap().tmrca.ci95();
+ hi - lo
+ };
+ assert!(
+ width(&propagate(&two, MU, RES, MAXA)) < width(&propagate(&one, MU, RES, MAXA)),
+ "two independent sub-clades give a tighter parent TMRCA than one"
+ );
+ }
+
+ // ── DB-gated: full path over a seeded root→mid→leaf tree ──────────────────
+ async fn ins_hg(pool: &PgPool, name: &str) -> i64 {
+ sqlx::query_scalar(
+ "INSERT INTO tree.haplogroup (name, haplogroup_type) \
+ VALUES ($1, 'Y_DNA'::core.dna_type) RETURNING id",
+ )
+ .bind(name)
+ .fetch_one(pool)
+ .await
+ .unwrap()
+ }
+ async fn ins_var(pool: &PgPool, name: &str, het: bool) -> i64 {
+ let ann = if het {
+ serde_json::json!({ "region_overlaps": ["heterochromatin:DYZ1"] })
+ } else {
+ serde_json::json!({})
+ };
+ sqlx::query_scalar(
+ "INSERT INTO core.variant (canonical_name, mutation_type, naming_status, annotations) \
+ VALUES ($1, 'SNP'::core.mutation_type, 'NAMED'::core.naming_status, $2) RETURNING id",
+ )
+ .bind(name)
+ .bind(ann)
+ .fetch_one(pool)
+ .await
+ .unwrap()
+ }
+
+ /// Seed a 3-node chain with one tester, run the whole pipeline, and check the
+ /// het-mask, causality (parent older), and formed > tmrca — against real PG.
+ #[tokio::test]
+ async fn recompute_over_seeded_tree() {
+ let Ok(url) = std::env::var("DATABASE_URL") else {
+ eprintln!("DATABASE_URL unset — skipping seeded age test");
+ return;
+ };
+ if url.is_empty() {
+ return;
+ }
+ let db = crate::testing::ephemeral_db(&url).await.expect("ephemeral db");
+ let pool = db.pool().clone();
+ const GUID: &str = "00000000-0000-0000-0000-0000000000aa";
+
+ let (root, mid, leaf) =
+ (ins_hg(&pool, "Y-ROOT").await, ins_hg(&pool, "Y-MID").await, ins_hg(&pool, "Y-LEAF").await);
+ for (p, c) in [(root, mid), (mid, leaf)] {
+ sqlx::query("INSERT INTO tree.haplogroup_relationship (parent_haplogroup_id, child_haplogroup_id) VALUES ($1,$2)")
+ .bind(p).bind(c).execute(&pool).await.unwrap();
+ }
+ // Defining (branch) SNPs: mid 4, leaf 3 — plus one heterochromatic defining
+ // SNP on leaf that must be masked out.
+ for i in 0..4 {
+ let v = ins_var(&pool, &format!("MIDDEF{i}"), false).await;
+ sqlx::query("INSERT INTO tree.haplogroup_variant (haplogroup_id, variant_id) VALUES ($1,$2)").bind(mid).bind(v).execute(&pool).await.unwrap();
+ }
+ for i in 0..3 {
+ let v = ins_var(&pool, &format!("LEAFDEF{i}"), false).await;
+ sqlx::query("INSERT INTO tree.haplogroup_variant (haplogroup_id, variant_id) VALUES ($1,$2)").bind(leaf).bind(v).execute(&pool).await.unwrap();
+ }
+ let hetdef = ins_var(&pool, "LEAFDEFHET", true).await;
+ sqlx::query("INSERT INTO tree.haplogroup_variant (haplogroup_id, variant_id) VALUES ($1,$2)").bind(leaf).bind(hetdef).execute(&pool).await.unwrap();
+
+ // One tester under leaf: 12.5 Mbp callable, 5 private SNPs + 1 het (masked).
+ sqlx::query("INSERT INTO core.biosample (sample_guid, source) VALUES ($1::uuid, 'CITIZEN')").bind(GUID).execute(&pool).await.unwrap();
+ sqlx::query("INSERT INTO genomics.biosample_callable_loci (sample_guid, chromosome, y_xdegen_callable_bp) VALUES ($1::uuid, 'chrY', 12500000)").bind(GUID).execute(&pool).await.unwrap();
+ for i in 0..5 {
+ let v = ins_var(&pool, &format!("PRIV{i}"), false).await;
+ sqlx::query("INSERT INTO tree.biosample_private_variant (sample_guid, variant_id, haplogroup_type, terminal_haplogroup_id) VALUES ($1::uuid,$2,'Y_DNA'::core.dna_type,$3)").bind(GUID).bind(v).bind(leaf).execute(&pool).await.unwrap();
+ }
+ let hv = ins_var(&pool, "PRIVHET", true).await;
+ sqlx::query("INSERT INTO tree.biosample_private_variant (sample_guid, variant_id, haplogroup_type, terminal_haplogroup_id) VALUES ($1::uuid,$2,'Y_DNA'::core.dna_type,$3)").bind(GUID).bind(hv).bind(leaf).execute(&pool).await.unwrap();
+
+ // (a) build_clades: het-masking + structure.
+ let (clades, ids) = build_clades(&pool).await.unwrap();
+ let at = |id: i64| ids.iter().position(|&x| x == id).unwrap();
+ assert_eq!(clades[at(leaf)].tester_snps, vec![5], "het private SNP masked → 5 counted");
+ assert_eq!(clades[at(leaf)].branch_snps, 3, "het defining SNP masked → 3");
+ assert!(clades[at(mid)].children.contains(&at(leaf)));
+ assert!(clades[at(root)].children.contains(&at(mid)));
+ assert!((clades[at(leaf)].callable_bp - 12_500_000.0).abs() < 1.0);
+
+ // (b) full recompute: ages written, causality, formed > tmrca.
+ let stats = recompute_combined_ages(&pool).await.unwrap();
+ assert!(stats.snp >= 3, "root/mid/leaf all scored, got {}", stats.snp);
+ let rows: Vec<(i64, Option, Option)> = sqlx::query_as(
+ "SELECT id, tmrca_ybp, formed_ybp FROM tree.haplogroup WHERE id = ANY($1)",
+ )
+ .bind(vec![root, mid, leaf])
+ .fetch_all(&pool)
+ .await
+ .unwrap();
+ let tmrca = |id: i64| rows.iter().find(|r| r.0 == id).unwrap().1.unwrap();
+ let formed = |id: i64| rows.iter().find(|r| r.0 == id).unwrap().2.unwrap();
+ assert!(tmrca(leaf) > 0, "leaf has a positive TMRCA");
+ assert!(tmrca(root) > tmrca(mid) && tmrca(mid) > tmrca(leaf), "causality: root>mid>leaf");
+ assert!(formed(leaf) >= tmrca(leaf), "leaf formed age ≥ its TMRCA");
+ }
+}
diff --git a/rust/crates/du-db/src/audit.rs b/rust/crates/du-db/src/audit.rs
new file mode 100644
index 00000000..65a8c047
--- /dev/null
+++ b/rust/crates/du-db/src/audit.rs
@@ -0,0 +1,39 @@
+//! Curator action audit trail (`ident.audit_log`). The first runtime writer; the
+//! column set mirrors the legacy backfill in `du-migrate`. Used to record curator
+//! decisions (accept/reject) on consensus proposals.
+
+use crate::DbError;
+use sqlx::PgExecutor;
+use uuid::Uuid;
+
+/// Append a curator action to the audit log. `entity_id` is the catalog row id;
+/// `action` is a short verb (`ACCEPT`/`REJECT`/`CREATE`/`UPDATE`/`DELETE`). `id`
+/// and `created_at` use DB defaults. `executor` is any pool or connection — pass
+/// the surrounding `&mut *tx` to keep the audit row atomic with the mutation it
+/// records.
+#[allow(clippy::too_many_arguments)]
+pub async fn log<'e, E: PgExecutor<'e>>(
+ executor: E,
+ user_id: Uuid,
+ entity_type: &str,
+ entity_id: i64,
+ action: &str,
+ old_value: Option<&serde_json::Value>,
+ new_value: Option<&serde_json::Value>,
+ comment: Option<&str>,
+) -> Result<(), DbError> {
+ sqlx::query(
+ "INSERT INTO ident.audit_log (user_id, entity_type, entity_id, action, old_value, new_value, comment) \
+ VALUES ($1, $2, $3, $4, $5, $6, $7)",
+ )
+ .bind(user_id)
+ .bind(entity_type)
+ .bind(entity_id)
+ .bind(action)
+ .bind(old_value)
+ .bind(new_value)
+ .bind(comment)
+ .execute(executor)
+ .await?;
+ Ok(())
+}
diff --git a/rust/crates/du-db/src/auth.rs b/rust/crates/du-db/src/auth.rs
new file mode 100644
index 00000000..b6606c3c
--- /dev/null
+++ b/rust/crates/du-db/src/auth.rs
@@ -0,0 +1,132 @@
+//! Authentication/authorization queries against the `ident` schema.
+
+use crate::DbError;
+use du_domain::ids::UserId;
+use sqlx::PgPool;
+use uuid::Uuid;
+
+/// A login credential: the owning user and the stored password hash (None for
+/// OAuth-only logins).
+pub struct Credential {
+ pub user_id: UserId,
+ pub password_hash: Option,
+}
+
+/// Look up a credential by provider key (handle/email) for the `credentials`
+/// provider. Returns None if no such active user/login exists.
+pub async fn find_credential(pool: &PgPool, provider_key: &str) -> Result, DbError> {
+ #[derive(sqlx::FromRow)]
+ struct Row {
+ user_id: Uuid,
+ password_hash: Option,
+ }
+ let row: Option = sqlx::query_as(
+ "SELECT li.user_id, li.password_hash \
+ FROM ident.user_login_info li \
+ JOIN ident.users u ON u.id = li.user_id \
+ WHERE li.provider_id = 'credentials' AND li.provider_key = $1 AND u.is_active = true",
+ )
+ .bind(provider_key)
+ .fetch_optional(pool)
+ .await?;
+ Ok(row.map(|r| Credential {
+ user_id: UserId(r.user_id),
+ password_hash: r.password_hash,
+ }))
+}
+
+/// Find-or-create a user by AT Protocol DID (the OAuth login path), ensuring an
+/// `atproto` login_info row. Returns the user id.
+pub async fn upsert_user_by_did(
+ pool: &PgPool,
+ did: &str,
+ handle: Option<&str>,
+ display_name: Option<&str>,
+) -> Result {
+ let id: Uuid = sqlx::query_scalar(
+ "INSERT INTO ident.users (did, handle, display_name) VALUES ($1,$2,$3) \
+ ON CONFLICT (did) DO UPDATE SET \
+ handle = COALESCE(EXCLUDED.handle, ident.users.handle), \
+ display_name = COALESCE(EXCLUDED.display_name, ident.users.display_name), \
+ updated_at = now() \
+ RETURNING id",
+ )
+ .bind(did)
+ .bind(handle)
+ .bind(display_name)
+ .fetch_one(pool)
+ .await?;
+ sqlx::query(
+ "INSERT INTO ident.user_login_info (user_id, provider_id, provider_key) \
+ VALUES ($1, 'atproto', $2) ON CONFLICT (provider_id, provider_key) DO NOTHING",
+ )
+ .bind(id)
+ .bind(did)
+ .execute(pool)
+ .await?;
+ Ok(UserId(id))
+}
+
+/// A user's profile for their own account view.
+pub struct Profile {
+ pub display_name: Option,
+ pub email: Option,
+ pub did: Option,
+ pub handle: Option,
+ pub created_at: chrono::DateTime,
+}
+
+/// Update a user's display name. Returns whether a row changed.
+pub async fn update_display_name(pool: &PgPool, user_id: UserId, name: &str) -> Result {
+ let n = sqlx::query("UPDATE ident.users SET display_name = $2, updated_at = now() WHERE id = $1")
+ .bind(user_id.0)
+ .bind(name)
+ .execute(pool)
+ .await?
+ .rows_affected();
+ Ok(n > 0)
+}
+
+/// Fetch a user's profile fields (None if the user no longer exists).
+pub async fn profile(pool: &PgPool, user_id: UserId) -> Result, DbError> {
+ #[derive(sqlx::FromRow)]
+ struct Row {
+ display_name: Option,
+ email: Option,
+ did: Option,
+ handle: Option,
+ created_at: chrono::DateTime,
+ }
+ let row: Option = sqlx::query_as(
+ "SELECT display_name, email::text AS email, did, handle, created_at \
+ FROM ident.users WHERE id = $1",
+ )
+ .bind(user_id.0)
+ .fetch_optional(pool)
+ .await?;
+ Ok(row.map(|r| Profile {
+ display_name: r.display_name,
+ email: r.email,
+ did: r.did,
+ handle: r.handle,
+ created_at: r.created_at,
+ }))
+}
+
+/// The display name + role names for a user (for the session).
+pub async fn session_info(pool: &PgPool, user_id: UserId) -> Result<(Option, Vec), DbError> {
+ let display_name: Option =
+ sqlx::query_scalar("SELECT display_name FROM ident.users WHERE id = $1")
+ .bind(user_id.0)
+ .fetch_optional(pool)
+ .await?
+ .flatten();
+ let roles: Vec = sqlx::query_scalar(
+ "SELECT r.name FROM ident.user_roles ur \
+ JOIN ident.roles r ON r.id = ur.role_id WHERE ur.user_id = $1 ORDER BY r.name",
+ )
+ .bind(user_id.0)
+ .fetch_all(pool)
+ .await?;
+ Ok((display_name, roles))
+}
diff --git a/rust/crates/du-db/src/biosample.rs b/rust/crates/du-db/src/biosample.rs
new file mode 100644
index 00000000..e9c3fb88
--- /dev/null
+++ b/rust/crates/du-db/src/biosample.rs
@@ -0,0 +1,693 @@
+//! Queries for the unified `core.biosample`.
+
+use crate::{parse_pg_enum, DbError, Page};
+use du_domain::biosample::{Biosample, GeoPoint};
+use du_domain::enums::{BiosampleSource, DnaType};
+use du_domain::ids::{PublicationId, SampleGuid};
+use sqlx::PgPool;
+use uuid::Uuid;
+
+#[derive(sqlx::FromRow)]
+struct BiosampleRow {
+ sample_guid: Uuid,
+ source: String,
+ accession: Option,
+ alias: Option,
+ description: Option,
+ center_name: Option,
+ locked: bool,
+ source_attrs: serde_json::Value,
+ atproto: Option,
+}
+
+impl BiosampleRow {
+ fn into_domain(self) -> Result {
+ Ok(Biosample {
+ sample_guid: SampleGuid(self.sample_guid),
+ source: parse_pg_enum(&self.source, "source")?,
+ accession: self.accession,
+ alias: self.alias,
+ description: self.description,
+ center_name: self.center_name,
+ locked: self.locked,
+ source_attrs: self.source_attrs,
+ atproto: self.atproto,
+ })
+ }
+}
+
+const SELECT: &str = "SELECT sample_guid, source::text AS source, accession, alias, description, \
+ center_name, locked, source_attrs, atproto FROM core.biosample WHERE deleted = false";
+
+pub async fn get_by_guid(pool: &PgPool, guid: SampleGuid) -> Result, DbError> {
+ let row: Option = sqlx::query_as(&format!("{SELECT} AND sample_guid = $1"))
+ .bind(guid.0)
+ .fetch_optional(pool)
+ .await?;
+ row.map(BiosampleRow::into_domain).transpose()
+}
+
+/// All mappable biosample locations. PostGIS `ST_X`/`ST_Y` extract lon/lat from
+/// the donor's `geocoord` (geometry Point, 4326). Backs the biosample map.
+pub async fn geo_points(pool: &PgPool) -> Result, DbError> {
+ #[derive(sqlx::FromRow)]
+ struct GeoRow {
+ lat: f64,
+ lon: f64,
+ accession: Option,
+ source: String,
+ }
+ let rows: Vec = sqlx::query_as(
+ "SELECT ST_Y(d.geocoord) AS lat, ST_X(d.geocoord) AS lon, b.accession, \
+ b.source::text AS source \
+ FROM core.biosample b JOIN core.specimen_donor d ON d.id = b.donor_id \
+ WHERE d.geocoord IS NOT NULL AND b.deleted = false",
+ )
+ .fetch_all(pool)
+ .await?;
+ rows.into_iter()
+ .map(|r| {
+ Ok(GeoPoint {
+ lat: r.lat,
+ lon: r.lon,
+ accession: r.accession,
+ source: parse_pg_enum(&r.source, "source")?,
+ })
+ })
+ .collect()
+}
+
+/// Paginated biosamples linked to a publication (the biosample report).
+pub async fn for_publication(
+ pool: &PgPool,
+ publication_id: PublicationId,
+ page: i64,
+ page_size: i64,
+) -> Result, DbError> {
+ let offset = Page::<()>::offset(page, page_size);
+ let limit = page_size.clamp(1, 200);
+
+ let total: i64 = sqlx::query_scalar(
+ "SELECT count(*) FROM pubs.publication_biosample pb \
+ JOIN core.biosample b ON b.sample_guid = pb.sample_guid \
+ WHERE pb.publication_id = $1 AND b.deleted = false",
+ )
+ .bind(publication_id.0)
+ .fetch_one(pool)
+ .await?;
+
+ let rows: Vec = sqlx::query_as(
+ "SELECT b.sample_guid, b.source::text AS source, b.accession, b.alias, b.description, \
+ b.center_name, b.locked, b.source_attrs, b.atproto \
+ FROM pubs.publication_biosample pb \
+ JOIN core.biosample b ON b.sample_guid = pb.sample_guid \
+ WHERE pb.publication_id = $1 AND b.deleted = false \
+ ORDER BY b.accession NULLS LAST, b.sample_guid LIMIT $2 OFFSET $3",
+ )
+ .bind(publication_id.0)
+ .bind(limit)
+ .bind(offset)
+ .fetch_all(pool)
+ .await?;
+
+ let items = rows
+ .into_iter()
+ .map(BiosampleRow::into_domain)
+ .collect::, _>>()?;
+ Ok(Page { items, total, page: page.max(1), page_size: limit })
+}
+
+// ── Public per-sample report (unified read path) ──────────────────────────────
+// The canonical `core.biosample` (identity, the `is_public` gate, publications)
+// joined to the federated analytics mirror (`fed.*`) via `atproto.uri ↔ *.biosample_ref`.
+// Callers never touch `fed.*` directly — this is the seam the eventual full
+// core/fed consolidation collapses into (only the query bodies change).
+
+/// Origin of a sample's haplogroup call — provenance shown to the reader.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum HaplogroupCallOrigin {
+ /// `fed.haplogroup_reconciliation` — the donor's call reconciled across all its
+ /// sequencing technologies (the authoritative cross-technology consensus).
+ Reconciled,
+ /// `fed.biosample.y/mt_haplogroup` (a single Navigator call, not reconciled).
+ FedConsensus,
+ /// `core.biosample.original_haplogroups` (per-publication original call).
+ Original,
+}
+
+/// A called haplogroup name plus its lineage. The phylogenetic pathway is
+/// resolved separately by [`crate::haplogroup::pathway`] so the SQL layer stays
+/// free of tree-walking. The reliability fields are populated only for a
+/// `Reconciled` call (the cross-technology consensus).
+#[derive(Debug, Clone)]
+pub struct HaplogroupCall {
+ pub name: String,
+ pub dna_type: DnaType,
+ pub origin: HaplogroupCallOrigin,
+ /// Consensus confidence ∈ [0,1] (reconciled calls only).
+ pub confidence: Option,
+ /// Number of sequencing runs reconciled into the consensus.
+ pub run_count: Option,
+ /// SNP concordance across the reconciled runs ∈ [0,1].
+ pub snp_concordance: Option,
+ /// `COMPATIBLE` / `MINOR_DIVERGENCE` / `INCOMPATIBLE` …
+ pub compatibility_level: Option,
+}
+
+/// A reconciliation consensus call, before it's lifted to a [`HaplogroupCall`].
+struct ReconCall {
+ name: String,
+ confidence: Option,
+ run_count: Option,
+ snp_concordance: Option,
+ compatibility_level: Option,
+}
+
+/// Lift a reconciliation consensus to a `Reconciled`-origin [`HaplogroupCall`].
+fn reconciled_call(r: Option, dna_type: DnaType) -> Option {
+ r.map(|r| HaplogroupCall {
+ name: r.name,
+ dna_type,
+ origin: HaplogroupCallOrigin::Reconciled,
+ confidence: r.confidence,
+ run_count: r.run_count,
+ snp_concordance: r.snp_concordance,
+ compatibility_level: r.compatibility_level,
+ })
+}
+
+/// WGS84 origin point (from the donor's `geocoord`).
+#[derive(Debug, Clone, Copy)]
+pub struct LatLon {
+ pub lat: f64,
+ pub lon: f64,
+}
+
+#[derive(Debug, Clone)]
+pub struct ReportIdentity {
+ pub sample_guid: SampleGuid,
+ pub source: BiosampleSource,
+ pub accession: Option,
+ pub alias: Option,
+ pub description: Option,
+ pub center_name: Option,
+ /// The Postgres `biological_sex` label (`MALE`/`FEMALE`/`INTERSEX`), as text.
+ pub sex: Option,
+ pub origin: Option,
+ pub is_public: bool,
+ /// atproto link present AND a matching `fed.biosample` row was found.
+ pub is_federated: bool,
+}
+
+#[derive(Debug, Clone)]
+pub struct SequencingRun {
+ pub platform_name: Option,
+ pub instrument_model: Option,
+ pub test_type: Option,
+ pub library_layout: Option,
+ pub total_reads: Option,
+ pub read_length: Option,
+ pub mean_insert_size: Option,
+ /// at:// uri of the run (join key to its coverage summary).
+ pub at_uri: String,
+}
+
+#[derive(Debug, Clone)]
+pub struct CoverageSummary {
+ pub reference_build: Option,
+ pub aligner: Option,
+ pub mean_coverage: Option,
+ pub median_coverage: Option,
+ pub pct_10x: Option,
+ pub pct_20x: Option,
+ pub pct_30x: Option,
+ /// at:// uri of the sequencing run this coverage belongs to (may be NULL).
+ pub sequence_run_ref: Option,
+ /// The run's test type, when resolvable (drives the conformance check).
+ pub test_type: Option,
+ /// Advertised minimum depth for the test type (`test_type_definition`), if set.
+ pub expected_min_depth: Option,
+ /// The empirical cohort median depth for the test type (`test_type_coverage_norm`).
+ pub norm_median_depth: Option,
+ /// Conformance vs. the advertised spec (or cohort norm when no spec): `BELOW` /
+ /// `AT` / `ABOVE`, or `None` when there's nothing to compare against.
+ pub conformance: Option,
+}
+
+/// Classify a sample's aligned mean depth against the **empirical cohort norm**
+/// for its test type (preferred), falling back to the advertised spec only when no
+/// cohort norm exists yet. The cohort norm is the fair baseline: an advertised
+/// "30× WGS" is really a raw-yield spec (~90 Gb of reads), which aligns to less than
+/// 30× after QC/dedup, and D2C lab products don't target 30× aligned at all — so
+/// comparing aligned depth to a literal advertised number would mislabel them. The
+/// cohort norm is measured in the same aligned-depth units and reflects what each
+/// test type actually delivers. ±5% of the baseline counts as `AT`.
+fn conformance(mean: Option, expected: Option, norm: Option) -> Option {
+ let mean = mean?;
+ let baseline = norm.or(expected)?;
+ if baseline <= 0.0 {
+ return None;
+ }
+ Some(if mean < baseline * 0.95 {
+ "BELOW"
+ } else if mean > baseline * 1.05 {
+ "ABOVE"
+ } else {
+ "AT"
+ }
+ .to_string())
+}
+
+#[derive(Debug, Clone)]
+pub struct AncestryBreakdown {
+ pub analysis_method: Option,
+ pub panel_type: Option,
+ pub confidence_level: Option,
+ /// Continental rollup: `[{superPopulation, percentage}]`.
+ pub super_populations: serde_json::Value,
+ /// Sub-continental percentages (payload shape not asserted — render defensively).
+ pub components: serde_json::Value,
+}
+
+#[derive(Debug, Clone)]
+pub struct ReportPublication {
+ pub id: PublicationId,
+ pub title: String,
+ pub doi: Option,
+ pub url: Option,
+ pub publication_date: Option,
+}
+
+/// Everything the public per-sample report needs, assembled from the canonical
+/// biosample plus its federated analytics. `is_public` is carried (not filtered)
+/// so the web layer can let curators preview private samples; every public
+/// surface MUST check `identity.is_public` itself.
+#[derive(Debug, Clone)]
+pub struct SampleReport {
+ pub identity: ReportIdentity,
+ pub y: Option,
+ pub mt: Option,
+ pub sequencing: Vec,
+ pub coverage: Vec,
+ pub ancestry: Option,
+ pub publications: Vec,
+}
+
+/// Pick the first non-null call from an `original_haplogroups` JSONB array,
+/// tolerating both shapes (standard `{y, mt, y_result, mt_result}` and citizen
+/// `{y_result, mt_result}`, all keys null-stripped): prefer `primary`, else `fallback`.
+pub(crate) fn pick_original_call(arr: &serde_json::Value, primary: &str, fallback: &str) -> Option {
+ let entries = arr.as_array()?;
+ entries.iter().find_map(|e| {
+ let take = |k: &str| {
+ e.get(k)
+ .and_then(serde_json::Value::as_str)
+ .map(str::trim)
+ .filter(|s| !s.is_empty())
+ .map(str::to_string)
+ };
+ take(primary).or_else(|| take(fallback))
+ })
+}
+
+/// Resolve a slug/accession/alias/sample_guid string to a single `sample_guid`.
+/// Prefers public, then non-deleted rows deterministically (earliest guid).
+pub async fn resolve_guid(pool: &PgPool, identifier: &str) -> Result, DbError> {
+ let id = identifier.trim();
+ if let Ok(uuid) = Uuid::parse_str(id) {
+ let exists: Option =
+ sqlx::query_scalar("SELECT sample_guid FROM core.biosample WHERE sample_guid = $1 AND deleted = false")
+ .bind(uuid)
+ .fetch_optional(pool)
+ .await?;
+ return Ok(exists.map(SampleGuid));
+ }
+ let guid: Option = sqlx::query_scalar(
+ "SELECT sample_guid FROM core.biosample \
+ WHERE deleted = false AND (lower(accession) = lower($1) OR lower(alias) = lower($1)) \
+ ORDER BY is_public DESC, sample_guid LIMIT 1",
+ )
+ .bind(id)
+ .fetch_optional(pool)
+ .await?;
+ Ok(guid.map(SampleGuid))
+}
+
+/// Assemble the report for one sample by guid, or `None` if it doesn't exist /
+/// is deleted. Does NOT filter on `is_public` — the caller gates visibility.
+pub async fn report_by_guid(pool: &PgPool, guid: SampleGuid) -> Result, DbError> {
+ // ── Q1: identity + gate flags (joins the donor for sex/origin) ──
+ #[derive(sqlx::FromRow)]
+ struct IdRow {
+ sample_guid: Uuid,
+ source: String,
+ accession: Option,
+ alias: Option,
+ description: Option,
+ center_name: Option,
+ is_public: bool,
+ at_uri: Option,
+ repo_did: Option,
+ original_haplogroups: serde_json::Value,
+ sex: Option,
+ lat: Option,
+ lon: Option,
+ }
+ let id_row: Option = sqlx::query_as(
+ "SELECT b.sample_guid, b.source::text AS source, b.accession, b.alias, b.description, \
+ b.center_name, b.is_public, b.atproto->>'uri' AS at_uri, b.atproto->>'repo_did' AS repo_did, \
+ b.original_haplogroups, \
+ d.sex::text AS sex, ST_Y(d.geocoord) AS lat, ST_X(d.geocoord) AS lon \
+ FROM core.biosample b \
+ LEFT JOIN core.specimen_donor d ON d.id = b.donor_id \
+ WHERE b.sample_guid = $1 AND b.deleted = false",
+ )
+ .bind(guid.0)
+ .fetch_optional(pool)
+ .await?;
+ let Some(idr) = id_row else { return Ok(None) };
+
+ // ── Q2: federated consensus haplogroups (only when atproto-linked) ──
+ let mut fed_y: Option = None;
+ let mut fed_mt: Option = None;
+ let mut is_federated = false;
+ if let Some(at_uri) = idr.at_uri.as_deref() {
+ let fed: Option<(Option, Option)> =
+ sqlx::query_as("SELECT y_haplogroup, mt_haplogroup FROM fed.biosample WHERE at_uri = $1")
+ .bind(at_uri)
+ .fetch_optional(pool)
+ .await?;
+ if let Some((y, mt)) = fed {
+ is_federated = true;
+ fed_y = y;
+ fed_mt = mt;
+ }
+ }
+
+ // ── Q2b: the cross-technology consensus (the authoritative call). Keyed by the
+ // citizen's repo DID = the reconciliation publisher's DID; pick the best per arm.
+ let mut recon_y: Option = None;
+ let mut recon_mt: Option = None;
+ if let Some(repo_did) = idr.repo_did.as_deref() {
+ #[derive(sqlx::FromRow)]
+ struct ReconRow {
+ dna_type: Option,
+ consensus_haplogroup: Option,
+ confidence: Option,
+ run_count: Option,
+ snp_concordance: Option,
+ compatibility_level: Option,
+ }
+ let rows: Vec = sqlx::query_as(
+ "SELECT DISTINCT ON (dna_type) dna_type, consensus_haplogroup, confidence, run_count, \
+ snp_concordance, compatibility_level \
+ FROM fed.haplogroup_reconciliation \
+ WHERE did = $1 AND consensus_haplogroup IS NOT NULL \
+ ORDER BY dna_type, run_count DESC NULLS LAST, time_us DESC",
+ )
+ .bind(repo_did)
+ .fetch_all(pool)
+ .await?;
+ for r in rows {
+ let call = r.consensus_haplogroup.map(|name| ReconCall {
+ name,
+ confidence: r.confidence,
+ run_count: r.run_count,
+ snp_concordance: r.snp_concordance,
+ compatibility_level: r.compatibility_level,
+ });
+ match r.dna_type.as_deref() {
+ Some("Y_DNA") => recon_y = call,
+ Some("MT_DNA") => recon_mt = call,
+ _ => {}
+ }
+ }
+ if recon_y.is_some() || recon_mt.is_some() {
+ is_federated = true;
+ }
+ }
+
+ // Call precedence: cross-technology consensus, else the single federated call,
+ // else the newest original publication call.
+ let y = reconciled_call(recon_y, DnaType::YDna)
+ .or_else(|| {
+ fed_y.map(|name| HaplogroupCall {
+ name,
+ dna_type: DnaType::YDna,
+ origin: HaplogroupCallOrigin::FedConsensus,
+ confidence: None,
+ run_count: None,
+ snp_concordance: None,
+ compatibility_level: None,
+ })
+ })
+ .or_else(|| {
+ pick_original_call(&idr.original_haplogroups, "y", "y_result").map(|name| HaplogroupCall {
+ name,
+ dna_type: DnaType::YDna,
+ origin: HaplogroupCallOrigin::Original,
+ confidence: None,
+ run_count: None,
+ snp_concordance: None,
+ compatibility_level: None,
+ })
+ });
+ let mt = reconciled_call(recon_mt, DnaType::MtDna)
+ .or_else(|| {
+ fed_mt.map(|name| HaplogroupCall {
+ name,
+ dna_type: DnaType::MtDna,
+ origin: HaplogroupCallOrigin::FedConsensus,
+ confidence: None,
+ run_count: None,
+ snp_concordance: None,
+ compatibility_level: None,
+ })
+ })
+ .or_else(|| {
+ pick_original_call(&idr.original_haplogroups, "mt", "mt_result").map(|name| HaplogroupCall {
+ name,
+ dna_type: DnaType::MtDna,
+ origin: HaplogroupCallOrigin::Original,
+ confidence: None,
+ run_count: None,
+ snp_concordance: None,
+ compatibility_level: None,
+ })
+ });
+
+ // ── Q3/Q4/Q5: federated sequencing, coverage, ancestry (only when linked) ──
+ let mut sequencing = Vec::new();
+ let mut coverage = Vec::new();
+ let mut ancestry = None;
+ if let Some(at_uri) = idr.at_uri.as_deref() {
+ #[derive(sqlx::FromRow)]
+ struct SeqRow {
+ at_uri: String,
+ platform_name: Option,
+ instrument_model: Option,
+ test_type: Option,
+ library_layout: Option,
+ total_reads: Option,
+ read_length: Option,
+ mean_insert_size: Option,
+ }
+ let seq: Vec = sqlx::query_as(
+ "SELECT at_uri, platform_name, instrument_model, test_type, library_layout, \
+ total_reads, read_length, mean_insert_size \
+ FROM fed.sequencerun WHERE biosample_ref = $1 ORDER BY record_created_at DESC NULLS LAST",
+ )
+ .bind(at_uri)
+ .fetch_all(pool)
+ .await?;
+ sequencing = seq
+ .into_iter()
+ .map(|r| SequencingRun {
+ platform_name: r.platform_name,
+ instrument_model: r.instrument_model,
+ test_type: r.test_type,
+ library_layout: r.library_layout,
+ total_reads: r.total_reads,
+ read_length: r.read_length,
+ mean_insert_size: r.mean_insert_size,
+ at_uri: r.at_uri,
+ })
+ .collect();
+
+ #[derive(sqlx::FromRow)]
+ struct CovRow {
+ reference_build: Option,
+ aligner: Option,
+ mean_coverage: Option,
+ median_coverage: Option,
+ pct_10x: Option,
+ pct_20x: Option,
+ pct_30x: Option,
+ sequence_run_ref: Option,
+ test_type: Option,
+ expected_min_depth: Option,
+ norm_median_depth: Option,
+ }
+ // Resolve each coverage row's test type (via its run), the advertised spec
+ // (test_type_definition, opportunistic), and the empirical cohort norm.
+ let cov: Vec = sqlx::query_as(
+ "SELECT cs.reference_build, cs.aligner, cs.mean_coverage, cs.median_coverage, \
+ cs.pct_10x, cs.pct_20x, cs.pct_30x, cs.sequence_run_ref, \
+ sr.test_type AS test_type, ttd.expected_min_depth AS expected_min_depth, \
+ n.median_mean_depth AS norm_median_depth \
+ FROM fed.coverage_summary cs \
+ LEFT JOIN fed.sequencerun sr ON sr.at_uri = cs.sequence_run_ref \
+ LEFT JOIN genomics.test_type_definition ttd ON upper(ttd.code) = upper(sr.test_type) \
+ LEFT JOIN genomics.test_type_coverage_norm n ON n.test_type = sr.test_type \
+ WHERE cs.biosample_ref = $1 ORDER BY cs.mean_coverage DESC NULLS LAST",
+ )
+ .bind(at_uri)
+ .fetch_all(pool)
+ .await?;
+ coverage = cov
+ .into_iter()
+ .map(|r| CoverageSummary {
+ conformance: conformance(r.mean_coverage, r.expected_min_depth, r.norm_median_depth),
+ reference_build: r.reference_build,
+ aligner: r.aligner,
+ mean_coverage: r.mean_coverage,
+ median_coverage: r.median_coverage,
+ pct_10x: r.pct_10x,
+ pct_20x: r.pct_20x,
+ pct_30x: r.pct_30x,
+ sequence_run_ref: r.sequence_run_ref,
+ test_type: r.test_type,
+ expected_min_depth: r.expected_min_depth,
+ norm_median_depth: r.norm_median_depth,
+ })
+ .collect();
+
+ #[derive(sqlx::FromRow)]
+ struct AncRow {
+ analysis_method: Option,
+ panel_type: Option,
+ confidence_level: Option,
+ super_population_summary: serde_json::Value,
+ components: serde_json::Value,
+ }
+ let anc: Option = sqlx::query_as(
+ "SELECT analysis_method, panel_type, confidence_level, super_population_summary, components \
+ FROM fed.population_breakdown WHERE biosample_ref = $1 \
+ ORDER BY record_created_at DESC NULLS LAST LIMIT 1",
+ )
+ .bind(at_uri)
+ .fetch_optional(pool)
+ .await?;
+ ancestry = anc.map(|r| AncestryBreakdown {
+ analysis_method: r.analysis_method,
+ panel_type: r.panel_type,
+ confidence_level: r.confidence_level,
+ super_populations: r.super_population_summary,
+ components: r.components,
+ });
+ }
+
+ // ── Q6: source publications ──
+ #[derive(sqlx::FromRow)]
+ struct PubRow {
+ id: i64,
+ title: String,
+ doi: Option,
+ url: Option,
+ publication_date: Option,
+ }
+ let pubs: Vec = sqlx::query_as(
+ "SELECT p.id, p.title, p.doi, p.url, p.publication_date \
+ FROM pubs.publication_biosample pb JOIN pubs.publication p ON p.id = pb.publication_id \
+ WHERE pb.sample_guid = $1 ORDER BY p.publication_date DESC NULLS LAST",
+ )
+ .bind(guid.0)
+ .fetch_all(pool)
+ .await?;
+ let publications = pubs
+ .into_iter()
+ .map(|p| ReportPublication {
+ id: PublicationId(p.id),
+ title: p.title,
+ doi: p.doi,
+ url: p.url,
+ publication_date: p.publication_date,
+ })
+ .collect();
+
+ let origin = match (idr.lat, idr.lon) {
+ (Some(lat), Some(lon)) => Some(LatLon { lat, lon }),
+ _ => None,
+ };
+ let identity = ReportIdentity {
+ sample_guid: SampleGuid(idr.sample_guid),
+ source: parse_pg_enum(&idr.source, "source")?,
+ accession: idr.accession,
+ alias: idr.alias,
+ description: idr.description,
+ center_name: idr.center_name,
+ sex: idr.sex,
+ origin,
+ is_public: idr.is_public,
+ is_federated,
+ };
+ Ok(Some(SampleReport { identity, y, mt, sequencing, coverage, ancestry, publications }))
+}
+
+/// Resolve an identifier (slug/accession/alias/guid) and assemble its report.
+pub async fn report(pool: &PgPool, identifier: &str) -> Result, DbError> {
+ match resolve_guid(pool, identifier).await? {
+ Some(guid) => report_by_guid(pool, guid).await,
+ None => Ok(None),
+ }
+}
+
+/// Set the public-visibility flag on a sample. Returns whether a row changed.
+pub async fn set_public(pool: &PgPool, guid: SampleGuid, value: bool) -> Result {
+ let affected = sqlx::query(
+ "UPDATE core.biosample SET is_public = $2, updated_at = now() \
+ WHERE sample_guid = $1 AND deleted = false",
+ )
+ .bind(guid.0)
+ .bind(value)
+ .execute(pool)
+ .await?
+ .rows_affected();
+ Ok(affected > 0)
+}
+
+/// Lookup by accession or alias (the private biosample search).
+pub async fn find_by_alias_or_accession(
+ pool: &PgPool,
+ query: &str,
+) -> Result, DbError> {
+ let like = format!("%{}%", query.trim());
+ let rows: Vec =
+ sqlx::query_as(&format!("{SELECT} AND (accession ILIKE $1 OR alias ILIKE $1) ORDER BY accession LIMIT 50"))
+ .bind(&like)
+ .fetch_all(pool)
+ .await?;
+ rows.into_iter().map(BiosampleRow::into_domain).collect()
+}
+
+#[cfg(test)]
+mod tests {
+ use super::conformance;
+
+ #[test]
+ fn conformance_prefers_cohort_norm_then_spec() {
+ // The cohort norm wins over the advertised spec: a sample at 28× whose test
+ // type's cohort delivers ~29× is AT — NOT flagged BELOW against an advertised
+ // 30× aligned bar (the D2C case the user called out).
+ assert_eq!(conformance(Some(28.0), Some(30.0), Some(29.0)).as_deref(), Some("AT"));
+ // Genuinely under its cohort.
+ assert_eq!(conformance(Some(20.0), Some(30.0), Some(29.0)).as_deref(), Some("BELOW"));
+ // Above its cohort.
+ assert_eq!(conformance(Some(33.0), None, Some(29.0)).as_deref(), Some("ABOVE"));
+ // No cohort norm yet → fall back to the advertised spec.
+ assert_eq!(conformance(Some(20.0), Some(30.0), None).as_deref(), Some("BELOW"));
+ // Nothing to compare against.
+ assert_eq!(conformance(Some(30.0), None, None), None);
+ assert_eq!(conformance(None, Some(30.0), Some(29.0)), None);
+ }
+}
diff --git a/rust/crates/du-db/src/change_set.rs b/rust/crates/du-db/src/change_set.rs
new file mode 100644
index 00000000..587f4a4f
--- /dev/null
+++ b/rust/crates/du-db/src/change_set.rs
@@ -0,0 +1,597 @@
+//! Tree versioning: change-set lifecycle + apply engine.
+//!
+//! A change set groups proposed `tree_change` rows (CREATE/UPDATE/DELETE/
+//! REPARENT/VARIANT_EDIT). Curators review each change, then *apply* the set:
+//! approved changes are written to the production tree using the temporal edge
+//! model (close the current edge with `valid_until = now()`, open a new one).
+//!
+//! Lifecycle: DRAFT → READY_FOR_REVIEW → UNDER_REVIEW → APPLIED, with DISCARDED
+//! reachable from any non-applied state.
+//!
+//! Scope: this is the direct `tree_change` path — changes reference *existing*
+//! production haplogroup ids (REPARENT/UPDATE/DELETE/VARIANT_EDIT) or create a
+//! node under an existing parent (CREATE). The WIP staging path (placeholder ids
+//! + conflict resolutions) is produced by the merge algorithm and lands with it.
+
+use crate::{DbError, Page};
+use serde_json::Value;
+use sqlx::types::chrono::{DateTime, Utc};
+use sqlx::{PgPool, Postgres, Transaction};
+
+// ── views ────────────────────────────────────────────────────────────────────
+
+#[derive(Debug, Clone, sqlx::FromRow, serde::Serialize)]
+pub struct ChangeSetSummary {
+ pub id: i64,
+ pub source: String,
+ pub haplogroup_type: Option,
+ pub status: String,
+ pub description: Option,
+ pub change_count: i64,
+ pub created_by: Option,
+ pub created_at: DateTime,
+ pub promoted_by: Option,
+ pub promoted_at: Option>,
+}
+
+#[derive(Debug, Clone, sqlx::FromRow, serde::Serialize)]
+pub struct TreeChangeView {
+ pub id: i64,
+ pub change_type: String,
+ pub haplogroup_id: Option,
+ pub haplogroup_name: Option,
+ pub old_values: Option,
+ pub new_values: Option,
+ pub status: String,
+}
+
+#[derive(Debug, Clone, sqlx::FromRow, serde::Serialize)]
+pub struct CommentView {
+ pub id: i64,
+ pub commented_by: String,
+ pub comment: String,
+ pub created_at: DateTime,
+}
+
+#[derive(Debug, Clone, serde::Serialize)]
+pub struct ChangeSetDetail {
+ pub summary: ChangeSetSummary,
+ pub changes: Vec,
+ pub comments: Vec,
+}
+
+#[derive(Debug, Clone, Default, serde::Serialize)]
+pub struct DiffSummary {
+ pub added: i64,
+ pub removed: i64,
+ pub modified: i64,
+ pub reparented: i64,
+}
+
+#[derive(Debug, Clone, serde::Serialize)]
+pub struct DiffEntry {
+ pub diff_type: String,
+ pub name: String,
+ pub detail: Value,
+}
+
+#[derive(Debug, Clone, serde::Serialize)]
+pub struct TreeDiff {
+ pub entries: Vec,
+ pub summary: DiffSummary,
+}
+
+#[derive(Debug, Clone, Default, serde::Serialize)]
+pub struct ApplyResult {
+ pub created: i64,
+ pub updated: i64,
+ pub deleted: i64,
+ pub reparented: i64,
+ pub variant_edits: i64,
+ pub skipped: i64,
+}
+
+const CS_COLS: &str = "id, source, haplogroup_type::text AS haplogroup_type, status::text AS status, \
+ description, change_count::bigint AS change_count, created_by, created_at, promoted_by, promoted_at";
+
+// ── lifecycle ─────────────────────────────────────────────────────────────────
+
+pub async fn create(
+ pool: &PgPool,
+ source: &str,
+ haplogroup_type: Option<&str>,
+ description: Option<&str>,
+ created_by: &str,
+) -> Result {
+ let id: i64 = sqlx::query_scalar(
+ "INSERT INTO tree.change_set (source, haplogroup_type, description, created_by) \
+ VALUES ($1, $2::core.dna_type, $3, $4) RETURNING id",
+ )
+ .bind(source)
+ .bind(haplogroup_type)
+ .bind(description)
+ .bind(created_by)
+ .fetch_one(pool)
+ .await?;
+ Ok(id)
+}
+
+/// Author a change within a set (also bumps `change_count`). Returns its id.
+pub async fn add_change(
+ pool: &PgPool,
+ change_set_id: i64,
+ change_type: &str,
+ haplogroup_id: Option,
+ old_values: Option<&Value>,
+ new_values: Option<&Value>,
+) -> Result {
+ let mut tx = pool.begin().await?;
+ let id: i64 = sqlx::query_scalar(
+ "INSERT INTO tree.tree_change (change_set_id, change_type, haplogroup_id, old_values, new_values) \
+ VALUES ($1, $2::tree.tree_change_type, $3, $4, $5) RETURNING id",
+ )
+ .bind(change_set_id)
+ .bind(change_type)
+ .bind(haplogroup_id)
+ .bind(old_values)
+ .bind(new_values)
+ .fetch_one(&mut *tx)
+ .await?;
+ sqlx::query("UPDATE tree.change_set SET change_count = change_count + 1 WHERE id = $1")
+ .bind(change_set_id)
+ .execute(&mut *tx)
+ .await?;
+ tx.commit().await?;
+ Ok(id)
+}
+
+pub async fn list(
+ pool: &PgPool,
+ haplogroup_type: Option<&str>,
+ status: Option<&str>,
+ page: i64,
+ page_size: i64,
+) -> Result, DbError> {
+ let offset = Page::<()>::offset(page, page_size);
+ let limit = page_size.clamp(1, 200);
+ let where_sql = "WHERE ($1::text IS NULL OR haplogroup_type::text = $1) \
+ AND ($2::text IS NULL OR status::text = $2)";
+ let total: i64 = sqlx::query_scalar(&format!("SELECT count(*) FROM tree.change_set {where_sql}"))
+ .bind(haplogroup_type)
+ .bind(status)
+ .fetch_one(pool)
+ .await?;
+ let items: Vec = sqlx::query_as(&format!(
+ "SELECT {CS_COLS} FROM tree.change_set {where_sql} ORDER BY created_at DESC, id DESC LIMIT $3 OFFSET $4"
+ ))
+ .bind(haplogroup_type)
+ .bind(status)
+ .bind(limit)
+ .bind(offset)
+ .fetch_all(pool)
+ .await?;
+ Ok(Page { items, total, page: page.max(1), page_size: limit })
+}
+
+pub async fn get(pool: &PgPool, id: i64) -> Result, DbError> {
+ let summary: Option =
+ sqlx::query_as(&format!("SELECT {CS_COLS} FROM tree.change_set WHERE id = $1"))
+ .bind(id)
+ .fetch_optional(pool)
+ .await?;
+ let Some(summary) = summary else { return Ok(None) };
+
+ let changes: Vec = sqlx::query_as(
+ "SELECT tc.id, tc.change_type::text AS change_type, tc.haplogroup_id, h.name AS haplogroup_name, \
+ tc.old_values, tc.new_values, tc.status \
+ FROM tree.tree_change tc LEFT JOIN tree.haplogroup h ON h.id = tc.haplogroup_id \
+ WHERE tc.change_set_id = $1 ORDER BY tc.id",
+ )
+ .bind(id)
+ .fetch_all(pool)
+ .await?;
+
+ let comments: Vec = sqlx::query_as(
+ "SELECT id, commented_by, comment, created_at FROM tree.change_set_comment \
+ WHERE change_set_id = $1 ORDER BY created_at, id",
+ )
+ .bind(id)
+ .fetch_all(pool)
+ .await?;
+
+ Ok(Some(ChangeSetDetail { summary, changes, comments }))
+}
+
+pub async fn add_comment(pool: &PgPool, id: i64, by: &str, comment: &str) -> Result {
+ Ok(sqlx::query_scalar(
+ "INSERT INTO tree.change_set_comment (change_set_id, commented_by, comment) VALUES ($1,$2,$3) RETURNING id",
+ )
+ .bind(id)
+ .bind(by)
+ .bind(comment)
+ .fetch_one(pool)
+ .await?)
+}
+
+/// DRAFT/READY_FOR_REVIEW -> UNDER_REVIEW.
+pub async fn start_review(pool: &PgPool, id: i64) -> Result {
+ let n = sqlx::query(
+ "UPDATE tree.change_set SET status = 'UNDER_REVIEW' \
+ WHERE id = $1 AND status IN ('DRAFT','READY_FOR_REVIEW')",
+ )
+ .bind(id)
+ .execute(pool)
+ .await?
+ .rows_affected();
+ Ok(n > 0)
+}
+
+/// Any non-applied state -> DISCARDED.
+pub async fn discard(pool: &PgPool, id: i64, by: &str) -> Result {
+ let n = sqlx::query(
+ "UPDATE tree.change_set SET status = 'DISCARDED', promoted_by = $2 \
+ WHERE id = $1 AND status <> 'APPLIED'",
+ )
+ .bind(id)
+ .bind(by)
+ .execute(pool)
+ .await?
+ .rows_affected();
+ Ok(n > 0)
+}
+
+/// Set a single change's review status. `approve` -> APPROVED, else REJECTED.
+pub async fn review_change(pool: &PgPool, change_id: i64, approve: bool) -> Result {
+ let status = if approve { "APPROVED" } else { "REJECTED" };
+ let n = sqlx::query(
+ "UPDATE tree.tree_change SET status = $2 \
+ FROM tree.change_set cs \
+ WHERE tree_change.id = $1 AND tree_change.change_set_id = cs.id \
+ AND cs.status NOT IN ('APPLIED','DISCARDED')",
+ )
+ .bind(change_id)
+ .bind(status)
+ .execute(pool)
+ .await?
+ .rows_affected();
+ Ok(n > 0)
+}
+
+/// Approve all PENDING changes in a (non-applied) set. Returns the count.
+pub async fn approve_all(pool: &PgPool, id: i64) -> Result {
+ let n = sqlx::query(
+ "UPDATE tree.tree_change SET status = 'APPROVED' \
+ FROM tree.change_set cs \
+ WHERE tree_change.change_set_id = $1 AND cs.id = $1 \
+ AND tree_change.status = 'PENDING' AND cs.status NOT IN ('APPLIED','DISCARDED')",
+ )
+ .bind(id)
+ .execute(pool)
+ .await?
+ .rows_affected();
+ Ok(n)
+}
+
+// ── diff ──────────────────────────────────────────────────────────────────────
+
+pub async fn diff(pool: &PgPool, id: i64) -> Result {
+ let changes: Vec = sqlx::query_as(
+ "SELECT tc.id, tc.change_type::text AS change_type, tc.haplogroup_id, h.name AS haplogroup_name, \
+ tc.old_values, tc.new_values, tc.status \
+ FROM tree.tree_change tc LEFT JOIN tree.haplogroup h ON h.id = tc.haplogroup_id \
+ WHERE tc.change_set_id = $1 AND tc.status <> 'REJECTED' ORDER BY tc.id",
+ )
+ .bind(id)
+ .fetch_all(pool)
+ .await?;
+
+ let mut summary = DiffSummary::default();
+ let mut entries = Vec::with_capacity(changes.len());
+ for c in changes {
+ let name = c
+ .haplogroup_name
+ .clone()
+ .or_else(|| c.new_values.as_ref().and_then(|v| jstr(v, "name")))
+ .unwrap_or_else(|| "(unnamed)".to_string());
+ let diff_type = match c.change_type.as_str() {
+ "CREATE" => {
+ summary.added += 1;
+ "ADDED"
+ }
+ "DELETE" => {
+ summary.removed += 1;
+ "REMOVED"
+ }
+ "REPARENT" => {
+ summary.reparented += 1;
+ "REPARENTED"
+ }
+ _ => {
+ summary.modified += 1;
+ "MODIFIED"
+ }
+ };
+ entries.push(DiffEntry {
+ diff_type: diff_type.to_string(),
+ name,
+ detail: serde_json::json!({ "change_type": c.change_type, "old": c.old_values, "new": c.new_values }),
+ });
+ }
+ Ok(TreeDiff { entries, summary })
+}
+
+// ── apply ─────────────────────────────────────────────────────────────────────
+
+/// Apply all APPROVED changes to the production tree (temporal model) and mark
+/// the set APPLIED. Idempotent on status: re-applying an APPLIED set is a no-op
+/// error. The whole apply runs in one transaction.
+pub async fn apply(pool: &PgPool, id: i64, by: &str) -> Result {
+ let mut tx = pool.begin().await?;
+
+ // Lock the set; gate on a reviewable status.
+ let (status, cs_dna): (String, Option) = sqlx::query_as(
+ "SELECT status::text, haplogroup_type::text FROM tree.change_set WHERE id = $1 FOR UPDATE",
+ )
+ .bind(id)
+ .fetch_optional(&mut *tx)
+ .await?
+ .ok_or_else(|| DbError::Conflict(format!("change set {id} not found")))?;
+ if !matches!(status.as_str(), "UNDER_REVIEW" | "READY_FOR_REVIEW") {
+ return Err(DbError::Conflict(format!(
+ "change set must be UNDER_REVIEW or READY_FOR_REVIEW to apply (is {status})"
+ )));
+ }
+
+ let changes: Vec = sqlx::query_as(
+ "SELECT id, change_type::text AS change_type, haplogroup_id, new_values \
+ FROM tree.tree_change WHERE change_set_id = $1 AND status = 'APPROVED' ORDER BY id",
+ )
+ .bind(id)
+ .fetch_all(&mut *tx)
+ .await?;
+
+ let mut result = ApplyResult::default();
+ // Maps a CREATE's negative placeholder id to the real id it gets, so later
+ // changes in the set (children, reparents) can reference nodes created
+ // earlier in this same apply. Changes are ordered by id = insertion order =
+ // parent-before-child (the merge emits them that way).
+ let mut placeholders: std::collections::HashMap = std::collections::HashMap::new();
+ for c in &changes {
+ apply_change(&mut tx, c, cs_dna.as_deref(), &mut placeholders, &mut result).await?;
+ sqlx::query("UPDATE tree.tree_change SET status = 'APPLIED' WHERE id = $1")
+ .bind(c.id)
+ .execute(&mut *tx)
+ .await?;
+ }
+
+ sqlx::query("UPDATE tree.change_set SET status = 'APPLIED', promoted_by = $2, promoted_at = now() WHERE id = $1")
+ .bind(id)
+ .bind(by)
+ .execute(&mut *tx)
+ .await?;
+
+ // The applied change-set altered the served tree — bump the revision marker
+ // in-transaction so caches (the Edge ETag) revalidate. Atomic with the apply.
+ crate::tree_revision::bump(&mut *tx).await?;
+
+ tx.commit().await?;
+ Ok(result)
+}
+
+#[derive(sqlx::FromRow)]
+struct TreeChangeRow {
+ id: i64,
+ change_type: String,
+ haplogroup_id: Option,
+ new_values: Option,
+}
+
+async fn apply_change(
+ tx: &mut Transaction<'_, Postgres>,
+ c: &TreeChangeRow,
+ cs_dna: Option<&str>,
+ placeholders: &mut std::collections::HashMap,
+ result: &mut ApplyResult,
+) -> Result<(), DbError> {
+ let nv = c.new_values.clone().unwrap_or(Value::Null);
+ match c.change_type.as_str() {
+ "CREATE" => {
+ let name = jstr(&nv, "name")
+ .ok_or_else(|| DbError::Conflict("CREATE change missing new_values.name".into()))?;
+ let dna = jstr(&nv, "haplogroup_type")
+ .or_else(|| cs_dna.map(str::to_string))
+ .ok_or_else(|| DbError::Conflict("CREATE change has no haplogroup_type".into()))?;
+ // `is_backbone` / `provenance` are optional in new_values: the merge
+ // engine omits them (COALESCE keeps the column defaults), while the
+ // SNP-graft writer carries the source's curated backbone flag and a
+ // provenance record (source name, source_updated).
+ let new_id: i64 = sqlx::query_scalar(
+ "INSERT INTO tree.haplogroup (name, haplogroup_type, lineage, source, formed_ybp, tmrca_ybp, is_backbone, provenance) \
+ VALUES ($1, $2::core.dna_type, $3, $4, $5, $6, COALESCE($7, false), COALESCE($8, '{}'::jsonb)) RETURNING id",
+ )
+ .bind(&name)
+ .bind(&dna)
+ .bind(jstr(&nv, "lineage"))
+ .bind(jstr(&nv, "source"))
+ .bind(jint(&nv, "formed_ybp").map(|v| v as i32))
+ .bind(jint(&nv, "tmrca_ybp").map(|v| v as i32))
+ .bind(jbool(&nv, "is_backbone"))
+ .bind(jval(&nv, "provenance"))
+ .fetch_one(&mut **tx)
+ .await?;
+ // Parent may be an existing id or a placeholder created earlier in
+ // this set; None makes a root (no parent edge).
+ let parent = resolve_ref(&nv, placeholders, "parent_haplogroup_id", "parent_placeholder")?;
+ if parent.is_some() {
+ open_edge(tx, new_id, parent, jstr(&nv, "source").as_deref()).await?;
+ }
+ for vid in jids(&nv, "variant_ids") {
+ link_variant(tx, new_id, vid).await?;
+ }
+ if let Some(ph) = jint(&nv, "placeholder") {
+ placeholders.insert(ph, new_id);
+ }
+ result.created += 1;
+ }
+ "UPDATE" => {
+ let hid = c.haplogroup_id.ok_or_else(|| DbError::Conflict("UPDATE change missing haplogroup_id".into()))?;
+ // COALESCE keeps existing values when a field is absent from new_values.
+ sqlx::query(
+ "UPDATE tree.haplogroup SET name = COALESCE($2, name), lineage = COALESCE($3, lineage), \
+ source = COALESCE($4, source), formed_ybp = COALESCE($5, formed_ybp), \
+ tmrca_ybp = COALESCE($6, tmrca_ybp) WHERE id = $1",
+ )
+ .bind(hid)
+ .bind(jstr(&nv, "name"))
+ .bind(jstr(&nv, "lineage"))
+ .bind(jstr(&nv, "source"))
+ .bind(jint(&nv, "formed_ybp").map(|v| v as i32))
+ .bind(jint(&nv, "tmrca_ybp").map(|v| v as i32))
+ .execute(&mut **tx)
+ .await?;
+ result.updated += 1;
+ }
+ "DELETE" => {
+ let hid = c.haplogroup_id.ok_or_else(|| DbError::Conflict("DELETE change missing haplogroup_id".into()))?;
+ // Temporal delete: expire the node, then detach by closing all
+ // current edges + variant links. The tree-navigation queries
+ // (roots/children/subtree) exclude expired nodes.
+ sqlx::query("UPDATE tree.haplogroup SET valid_until = now() WHERE id = $1 AND valid_until IS NULL")
+ .bind(hid)
+ .execute(&mut **tx)
+ .await?;
+ close_current_edges_for(tx, hid).await?;
+ sqlx::query(
+ "UPDATE tree.haplogroup_variant SET valid_until = now() \
+ WHERE haplogroup_id = $1 AND valid_until IS NULL",
+ )
+ .bind(hid)
+ .execute(&mut **tx)
+ .await?;
+ result.deleted += 1;
+ }
+ "REPARENT" => {
+ let hid = c.haplogroup_id.ok_or_else(|| DbError::Conflict("REPARENT change missing haplogroup_id".into()))?;
+ let new_parent = match resolve_ref(&nv, placeholders, "new_parent_haplogroup_id", "new_parent_placeholder")? {
+ Some(p) => Some(p),
+ None => jint(&nv, "parent_haplogroup_id"),
+ };
+ // Close the current parent edge, then open the new one.
+ sqlx::query(
+ "UPDATE tree.haplogroup_relationship SET valid_until = now() \
+ WHERE child_haplogroup_id = $1 AND valid_until IS NULL",
+ )
+ .bind(hid)
+ .execute(&mut **tx)
+ .await?;
+ open_edge(tx, hid, new_parent, jstr(&nv, "source").as_deref()).await?;
+ result.reparented += 1;
+ }
+ "VARIANT_EDIT" => {
+ let hid = c.haplogroup_id.ok_or_else(|| DbError::Conflict("VARIANT_EDIT change missing haplogroup_id".into()))?;
+ for vid in jids(&nv, "add") {
+ link_variant(tx, hid, vid).await?;
+ }
+ let remove = jids(&nv, "remove");
+ if !remove.is_empty() {
+ sqlx::query(
+ "UPDATE tree.haplogroup_variant SET valid_until = now() \
+ WHERE haplogroup_id = $1 AND variant_id = ANY($2) AND valid_until IS NULL",
+ )
+ .bind(hid)
+ .bind(&remove)
+ .execute(&mut **tx)
+ .await?;
+ }
+ result.variant_edits += 1;
+ }
+ other => {
+ tracing::warn!(change_type = other, "unknown tree_change type; skipped");
+ result.skipped += 1;
+ }
+ }
+ Ok(())
+}
+
+/// Open a new current edge (child under parent). `parent` None makes a root.
+async fn open_edge(
+ tx: &mut Transaction<'_, Postgres>,
+ child: i64,
+ parent: Option,
+ source: Option<&str>,
+) -> Result<(), DbError> {
+ sqlx::query(
+ "INSERT INTO tree.haplogroup_relationship (child_haplogroup_id, parent_haplogroup_id, source) \
+ VALUES ($1, $2, $3)",
+ )
+ .bind(child)
+ .bind(parent)
+ .bind(source)
+ .execute(&mut **tx)
+ .await?;
+ Ok(())
+}
+
+/// Close every current edge touching a node (as child or parent).
+async fn close_current_edges_for(tx: &mut Transaction<'_, Postgres>, hid: i64) -> Result<(), DbError> {
+ sqlx::query(
+ "UPDATE tree.haplogroup_relationship SET valid_until = now() \
+ WHERE (child_haplogroup_id = $1 OR parent_haplogroup_id = $1) AND valid_until IS NULL",
+ )
+ .bind(hid)
+ .execute(&mut **tx)
+ .await?;
+ Ok(())
+}
+
+async fn link_variant(tx: &mut Transaction<'_, Postgres>, hid: i64, vid: i64) -> Result<(), DbError> {
+ sqlx::query(
+ "INSERT INTO tree.haplogroup_variant (haplogroup_id, variant_id) VALUES ($1, $2) \
+ ON CONFLICT DO NOTHING",
+ )
+ .bind(hid)
+ .bind(vid)
+ .execute(&mut **tx)
+ .await?;
+ Ok(())
+}
+
+// ── small JSON helpers ────────────────────────────────────────────────────────
+
+/// Resolve a node reference that may be an existing id (`id_key`) or a
+/// placeholder (`ph_key`) created earlier in this apply. A placeholder with no
+/// mapping (its CREATE was rejected/not applied) is an unsatisfied dependency.
+fn resolve_ref(
+ nv: &Value,
+ placeholders: &std::collections::HashMap,
+ id_key: &str,
+ ph_key: &str,
+) -> Result